atomic.c revision 1.4
1/* TILE atomics. 2 Copyright (C) 2011-2016 Free Software Foundation, Inc. 3 Contributed by Walter Lee (walt@tilera.com) 4 5 This file is free software; you can redistribute it and/or modify it 6 under the terms of the GNU General Public License as published by the 7 Free Software Foundation; either version 3, or (at your option) any 8 later version. 9 10 This file is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24#include "tconfig.h" 25#include "coretypes.h" 26#include "atomic.h" 27 28#define bool unsigned char 29 30/* This code should be inlined by the compiler, but for now support 31 it as out-of-line methods in libgcc. */ 32 33static inline void 34pre_atomic_barrier (int model) 35{ 36 switch ((enum memmodel) model) 37 { 38 case MEMMODEL_RELEASE: 39 case MEMMODEL_ACQ_REL: 40 case MEMMODEL_SEQ_CST: 41 __atomic_thread_fence (model); 42 break; 43 default: 44 break; 45 } 46 return; 47} 48 49static inline void 50post_atomic_barrier (int model) 51{ 52 switch ((enum memmodel) model) 53 { 54 case MEMMODEL_ACQUIRE: 55 case MEMMODEL_ACQ_REL: 56 case MEMMODEL_SEQ_CST: 57 __atomic_thread_fence (model); 58 break; 59 default: 60 break; 61 } 62 return; 63} 64 65#define __unused __attribute__((unused)) 66 67#define __fetch_and_do(proto, type, size, opname, top, bottom) \ 68proto \ 69{ \ 70 top; \ 71 type rv = arch_atomic_##opname(p, i); \ 72 bottom; \ 73 return rv; \ 74} 75 76#define __atomic_fetch_and_do(type, size, opname) \ 77 __fetch_and_do(type __atomic_fetch_##opname##_##size(type* p, type i, int model), \ 78 type, size, opname, \ 79 pre_atomic_barrier(model), \ 80 post_atomic_barrier(model)) \ 81 82__atomic_fetch_and_do (int, 4, add) 83__atomic_fetch_and_do (int, 4, sub) 84__atomic_fetch_and_do (int, 4, or) 85__atomic_fetch_and_do (int, 4, and) 86__atomic_fetch_and_do (int, 4, xor) 87__atomic_fetch_and_do (int, 4, nand) 88__atomic_fetch_and_do (long long, 8, add) 89__atomic_fetch_and_do (long long, 8, sub) 90__atomic_fetch_and_do (long long, 8, or) 91__atomic_fetch_and_do (long long, 8, and) 92__atomic_fetch_and_do (long long, 8, xor) 93__atomic_fetch_and_do (long long, 8, nand) 94 95#define __sync_fetch_and_do(type, size, opname) \ 96 __fetch_and_do(type __sync_fetch_and_##opname##_##size(type* p, type i), \ 97 type, size, opname, \ 98 arch_atomic_write_barrier(), \ 99 arch_atomic_read_barrier()) 100 101__sync_fetch_and_do (int, 4, add) 102__sync_fetch_and_do (int, 4, sub) 103__sync_fetch_and_do (int, 4, or) 104__sync_fetch_and_do (int, 4, and) 105__sync_fetch_and_do (int, 4, xor) 106__sync_fetch_and_do (int, 4, nand) 107__sync_fetch_and_do (long long, 8, add) 108__sync_fetch_and_do (long long, 8, sub) 109__sync_fetch_and_do (long long, 8, or) 110__sync_fetch_and_do (long long, 8, and) 111__sync_fetch_and_do (long long, 8, xor) 112__sync_fetch_and_do (long long, 8, nand) 113 114#define __do_and_fetch(proto, type, size, opname, op, op2, top, bottom) \ 115proto \ 116{ \ 117 top; \ 118 type rv = op2 (arch_atomic_##opname(p, i) op i); \ 119 bottom; \ 120 return rv; \ 121} 122 123#define __atomic_do_and_fetch(type, size, opname, op, op2) \ 124 __do_and_fetch(type __atomic_##opname##_fetch_##size(type* p, type i, int model), \ 125 type, size, opname, op, op2, \ 126 pre_atomic_barrier(model), \ 127 post_atomic_barrier(model)) \ 128 129__atomic_do_and_fetch (int, 4, add, +, ) 130__atomic_do_and_fetch (int, 4, sub, -, ) 131__atomic_do_and_fetch (int, 4, or, |, ) 132__atomic_do_and_fetch (int, 4, and, &, ) 133__atomic_do_and_fetch (int, 4, xor, |, ) 134__atomic_do_and_fetch (int, 4, nand, &, ~) 135__atomic_do_and_fetch (long long, 8, add, +, ) 136__atomic_do_and_fetch (long long, 8, sub, -, ) 137__atomic_do_and_fetch (long long, 8, or, |, ) 138__atomic_do_and_fetch (long long, 8, and, &, ) 139__atomic_do_and_fetch (long long, 8, xor, |, ) 140__atomic_do_and_fetch (long long, 8, nand, &, ~) 141 142#define __sync_do_and_fetch(type, size, opname, op, op2) \ 143 __do_and_fetch(type __sync_##opname##_and_fetch_##size(type* p, type i), \ 144 type, size, opname, op, op2, \ 145 arch_atomic_write_barrier(), \ 146 arch_atomic_read_barrier()) \ 147 148__sync_do_and_fetch (int, 4, add, +, ) 149__sync_do_and_fetch (int, 4, sub, -, ) 150__sync_do_and_fetch (int, 4, or, |, ) 151__sync_do_and_fetch (int, 4, and, &, ) 152__sync_do_and_fetch (int, 4, xor, |, ) 153__sync_do_and_fetch (int, 4, nand, &, ~) 154__sync_do_and_fetch (long long, 8, add, +, ) 155__sync_do_and_fetch (long long, 8, sub, -, ) 156__sync_do_and_fetch (long long, 8, or, |, ) 157__sync_do_and_fetch (long long, 8, and, &, ) 158__sync_do_and_fetch (long long, 8, xor, |, ) 159__sync_do_and_fetch (long long, 8, nand, &, ~) 160 161#define __atomic_exchange_methods(type, size) \ 162bool \ 163__atomic_compare_exchange_##size(volatile type* ptr, type* oldvalp, \ 164 type newval, bool weak __unused, \ 165 int models, int modelf __unused) \ 166{ \ 167 type oldval = *oldvalp; \ 168 pre_atomic_barrier(models); \ 169 type retval = arch_atomic_val_compare_and_exchange(ptr, oldval, newval); \ 170 post_atomic_barrier(models); \ 171 bool success = (retval == oldval); \ 172 *oldvalp = retval; \ 173 return success; \ 174} \ 175 \ 176type \ 177__atomic_exchange_##size(volatile type* ptr, type val, int model) \ 178{ \ 179 pre_atomic_barrier(model); \ 180 type retval = arch_atomic_exchange(ptr, val); \ 181 post_atomic_barrier(model); \ 182 return retval; \ 183} 184 185__atomic_exchange_methods (int, 4) 186__atomic_exchange_methods (long long, 8) 187 188#define __sync_exchange_methods(type, size) \ 189type \ 190__sync_val_compare_and_swap_##size(type* ptr, type oldval, type newval) \ 191{ \ 192 arch_atomic_write_barrier(); \ 193 type retval = arch_atomic_val_compare_and_exchange(ptr, oldval, newval); \ 194 arch_atomic_read_barrier(); \ 195 return retval; \ 196} \ 197 \ 198bool \ 199__sync_bool_compare_and_swap_##size(type* ptr, type oldval, type newval) \ 200{ \ 201 arch_atomic_write_barrier(); \ 202 bool retval = arch_atomic_bool_compare_and_exchange(ptr, oldval, newval); \ 203 arch_atomic_read_barrier(); \ 204 return retval; \ 205} \ 206 \ 207type \ 208__sync_lock_test_and_set_##size(type* ptr, type val) \ 209{ \ 210 type retval = arch_atomic_exchange(ptr, val); \ 211 arch_atomic_acquire_barrier_value(retval); \ 212 return retval; \ 213} 214 215__sync_exchange_methods (int, 4) 216__sync_exchange_methods (long long, 8) 217 218#ifdef __LITTLE_ENDIAN__ 219#define BIT_OFFSET(n, type) ((n) * 8) 220#else 221#define BIT_OFFSET(n, type) ((4 - sizeof(type) - (n)) * 8) 222#endif 223 224/* Subword methods require the same approach for both TILEPro and 225 TILE-Gx. We load the background data for the word, insert the 226 desired subword piece, then compare-and-exchange it into place. */ 227#define u8 unsigned char 228#define u16 unsigned short 229 230#define __subword_cmpxchg_body(type, size, ptr, guess, val) \ 231 ({ \ 232 unsigned int *p = (unsigned int *)((unsigned long)ptr & ~3UL); \ 233 const int shift = BIT_OFFSET((unsigned long)ptr & 3UL, type); \ 234 const unsigned int valmask = (1 << (sizeof(type) * 8)) - 1; \ 235 const unsigned int bgmask = ~(valmask << shift); \ 236 unsigned int oldword = *p; \ 237 type oldval = (oldword >> shift) & valmask; \ 238 if (__builtin_expect((oldval == guess), 1)) { \ 239 unsigned int word = (oldword & bgmask) | ((val & valmask) << shift); \ 240 oldword = arch_atomic_val_compare_and_exchange(p, oldword, word); \ 241 oldval = (oldword >> shift) & valmask; \ 242 } \ 243 oldval; \ 244 }) \ 245 246#define __atomic_subword_cmpxchg(type, size) \ 247 \ 248bool \ 249__atomic_compare_exchange_##size(volatile type* ptr, type* guess_ptr, \ 250 type val, bool weak __unused, int models, \ 251 int modelf __unused) \ 252{ \ 253 pre_atomic_barrier(models); \ 254 type guess = *guess_ptr; \ 255 type oldval = __subword_cmpxchg_body(type, size, ptr, guess, val); \ 256 post_atomic_barrier(models); \ 257 bool success = (oldval == guess); \ 258 *guess_ptr = oldval; \ 259 return success; \ 260} 261 262__atomic_subword_cmpxchg (u8, 1) 263__atomic_subword_cmpxchg (u16, 2) 264 265#define __sync_subword_cmpxchg(type, size) \ 266 \ 267type \ 268__sync_val_compare_and_swap_##size(type* ptr, type guess, type val) \ 269{ \ 270 arch_atomic_write_barrier(); \ 271 type oldval = __subword_cmpxchg_body(type, size, ptr, guess, val); \ 272 arch_atomic_read_barrier(); \ 273 return oldval; \ 274} \ 275 \ 276bool \ 277__sync_bool_compare_and_swap_##size(type* ptr, type guess, type val) \ 278{ \ 279 type oldval = __sync_val_compare_and_swap_##size(ptr, guess, val); \ 280 return oldval == guess; \ 281} 282 283__sync_subword_cmpxchg (u8, 1) 284__sync_subword_cmpxchg (u16, 2) 285 286/* For the atomic-update subword methods, we use the same approach as 287 above, but we retry until we succeed if the compare-and-exchange 288 fails. */ 289#define __subword(type, proto, top, expr, bottom) \ 290proto \ 291{ \ 292 top \ 293 unsigned int *p = (unsigned int *)((unsigned long)ptr & ~3UL); \ 294 const int shift = BIT_OFFSET((unsigned long)ptr & 3UL, type); \ 295 const unsigned int valmask = (1 << (sizeof(type) * 8)) - 1; \ 296 const unsigned int bgmask = ~(valmask << shift); \ 297 unsigned int oldword, xword = *p; \ 298 type val, oldval; \ 299 do { \ 300 oldword = xword; \ 301 oldval = (oldword >> shift) & valmask; \ 302 val = expr; \ 303 unsigned int word = (oldword & bgmask) | ((val & valmask) << shift); \ 304 xword = arch_atomic_val_compare_and_exchange(p, oldword, word); \ 305 } while (__builtin_expect(xword != oldword, 0)); \ 306 bottom \ 307} 308 309#define __atomic_subword_fetch(type, funcname, expr, retval) \ 310 __subword(type, \ 311 type __atomic_ ## funcname(volatile type *ptr, type i, int model), \ 312 pre_atomic_barrier(model);, \ 313 expr, \ 314 post_atomic_barrier(model); return retval;) 315 316__atomic_subword_fetch (u8, fetch_add_1, oldval + i, oldval) 317__atomic_subword_fetch (u8, fetch_sub_1, oldval - i, oldval) 318__atomic_subword_fetch (u8, fetch_or_1, oldval | i, oldval) 319__atomic_subword_fetch (u8, fetch_and_1, oldval & i, oldval) 320__atomic_subword_fetch (u8, fetch_xor_1, oldval ^ i, oldval) 321__atomic_subword_fetch (u8, fetch_nand_1, ~(oldval & i), oldval) 322 323__atomic_subword_fetch (u16, fetch_add_2, oldval + i, oldval) 324__atomic_subword_fetch (u16, fetch_sub_2, oldval - i, oldval) 325__atomic_subword_fetch (u16, fetch_or_2, oldval | i, oldval) 326__atomic_subword_fetch (u16, fetch_and_2, oldval & i, oldval) 327__atomic_subword_fetch (u16, fetch_xor_2, oldval ^ i, oldval) 328__atomic_subword_fetch (u16, fetch_nand_2, ~(oldval & i), oldval) 329 330__atomic_subword_fetch (u8, add_fetch_1, oldval + i, val) 331__atomic_subword_fetch (u8, sub_fetch_1, oldval - i, val) 332__atomic_subword_fetch (u8, or_fetch_1, oldval | i, val) 333__atomic_subword_fetch (u8, and_fetch_1, oldval & i, val) 334__atomic_subword_fetch (u8, xor_fetch_1, oldval ^ i, val) 335__atomic_subword_fetch (u8, nand_fetch_1, ~(oldval & i), val) 336 337__atomic_subword_fetch (u16, add_fetch_2, oldval + i, val) 338__atomic_subword_fetch (u16, sub_fetch_2, oldval - i, val) 339__atomic_subword_fetch (u16, or_fetch_2, oldval | i, val) 340__atomic_subword_fetch (u16, and_fetch_2, oldval & i, val) 341__atomic_subword_fetch (u16, xor_fetch_2, oldval ^ i, val) 342__atomic_subword_fetch (u16, nand_fetch_2, ~(oldval & i), val) 343 344#define __sync_subword_fetch(type, funcname, expr, retval) \ 345 __subword(type, \ 346 type __sync_ ## funcname(type *ptr, type i), \ 347 arch_atomic_read_barrier();, \ 348 expr, \ 349 arch_atomic_write_barrier(); return retval;) 350 351__sync_subword_fetch (u8, fetch_and_add_1, oldval + i, oldval) 352__sync_subword_fetch (u8, fetch_and_sub_1, oldval - i, oldval) 353__sync_subword_fetch (u8, fetch_and_or_1, oldval | i, oldval) 354__sync_subword_fetch (u8, fetch_and_and_1, oldval & i, oldval) 355__sync_subword_fetch (u8, fetch_and_xor_1, oldval ^ i, oldval) 356__sync_subword_fetch (u8, fetch_and_nand_1, ~(oldval & i), oldval) 357 358__sync_subword_fetch (u16, fetch_and_add_2, oldval + i, oldval) 359__sync_subword_fetch (u16, fetch_and_sub_2, oldval - i, oldval) 360__sync_subword_fetch (u16, fetch_and_or_2, oldval | i, oldval) 361__sync_subword_fetch (u16, fetch_and_and_2, oldval & i, oldval) 362__sync_subword_fetch (u16, fetch_and_xor_2, oldval ^ i, oldval) 363__sync_subword_fetch (u16, fetch_and_nand_2, ~(oldval & i), oldval) 364 365__sync_subword_fetch (u8, add_and_fetch_1, oldval + i, val) 366__sync_subword_fetch (u8, sub_and_fetch_1, oldval - i, val) 367__sync_subword_fetch (u8, or_and_fetch_1, oldval | i, val) 368__sync_subword_fetch (u8, and_and_fetch_1, oldval & i, val) 369__sync_subword_fetch (u8, xor_and_fetch_1, oldval ^ i, val) 370__sync_subword_fetch (u8, nand_and_fetch_1, ~(oldval & i), val) 371 372__sync_subword_fetch (u16, add_and_fetch_2, oldval + i, val) 373__sync_subword_fetch (u16, sub_and_fetch_2, oldval - i, val) 374__sync_subword_fetch (u16, or_and_fetch_2, oldval | i, val) 375__sync_subword_fetch (u16, and_and_fetch_2, oldval & i, val) 376__sync_subword_fetch (u16, xor_and_fetch_2, oldval ^ i, val) 377__sync_subword_fetch (u16, nand_and_fetch_2, ~(oldval & i), val) 378 379#define __atomic_subword_lock(type, size) \ 380 __subword(type, \ 381 type __atomic_exchange_##size(volatile type* ptr, type nval, int model), \ 382 pre_atomic_barrier(model);, \ 383 nval, \ 384 post_atomic_barrier(model); return oldval;) 385 386__atomic_subword_lock (u8, 1) 387__atomic_subword_lock (u16, 2) 388 389#define __sync_subword_lock(type, size) \ 390 __subword(type, \ 391 type __sync_lock_test_and_set_##size(type* ptr, type nval), \ 392 , \ 393 nval, \ 394 arch_atomic_acquire_barrier_value(oldval); return oldval;) 395 396__sync_subword_lock (u8, 1) 397__sync_subword_lock (u16, 2) 398