1251524Sed/*- 2251524Sed * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> 3251524Sed * All rights reserved. 4251524Sed * 5251524Sed * Copyright (c) 1998 Doug Rabson 6251524Sed * All rights reserved. 7251524Sed * 8251524Sed * Redistribution and use in source and binary forms, with or without 9251524Sed * modification, are permitted provided that the following conditions 10251524Sed * are met: 11251524Sed * 1. Redistributions of source code must retain the above copyright 12251524Sed * notice, this list of conditions and the following disclaimer. 13251524Sed * 2. Redistributions in binary form must reproduce the above copyright 14251524Sed * notice, this list of conditions and the following disclaimer in the 15251524Sed * documentation and/or other materials provided with the distribution. 16251524Sed * 17251524Sed * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18251524Sed * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19251524Sed * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20251524Sed * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21251524Sed * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22251524Sed * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23251524Sed * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24251524Sed * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25251524Sed * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26251524Sed * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27251524Sed * SUCH DAMAGE. 28251524Sed */ 29251524Sed 30251524Sed#include <sys/cdefs.h> 31251524Sed__FBSDID("$FreeBSD$"); 32251524Sed 33251696Sed#include <sys/stdatomic.h> 34251524Sed#include <sys/types.h> 35251524Sed 36251524Sed#ifdef _KERNEL 37251524Sed#include "opt_global.h" 38251524Sed#endif 39251524Sed 40251696Sed#if defined(__SYNC_ATOMICS) 41251696Sed 42251524Sed/* 43251524Sed * Memory barriers. 44251524Sed * 45251524Sed * It turns out __sync_synchronize() does not emit any code when used 46251524Sed * with GCC 4.2. Implement our own version that does work reliably. 47251524Sed * 48251524Sed * Although __sync_lock_test_and_set() should only perform an acquire 49251524Sed * barrier, make it do a full barrier like the other functions. This 50251524Sed * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably. 51251524Sed */ 52251524Sed 53251524Sedstatic inline void 54251696Seddo_sync(void) 55251524Sed{ 56251524Sed 57251524Sed __asm volatile ( 58251524Sed#if !defined(_KERNEL) || defined(SMP) 59251524Sed ".set noreorder\n" 60251524Sed "\tsync\n" 61251524Sed "\tnop\n" 62251524Sed "\tnop\n" 63251524Sed "\tnop\n" 64251524Sed "\tnop\n" 65251524Sed "\tnop\n" 66251524Sed "\tnop\n" 67251524Sed "\tnop\n" 68251524Sed "\tnop\n" 69251524Sed ".set reorder\n" 70251524Sed#else /* _KERNEL && !SMP */ 71251524Sed "" 72251524Sed#endif /* !KERNEL || SMP */ 73251524Sed : : : "memory"); 74251524Sed} 75251524Sed 76251524Sedtypedef union { 77251524Sed uint8_t v8[4]; 78251524Sed uint32_t v32; 79251524Sed} reg_t; 80251524Sed 81251559Sed/* 82251559Sed * Given a memory address pointing to an 8-bit or 16-bit integer, return 83251559Sed * the address of the 32-bit word containing it. 84251559Sed */ 85251559Sed 86251539Sedstatic inline uint32_t * 87251539Sedround_to_word(void *ptr) 88251539Sed{ 89251539Sed 90251539Sed return ((uint32_t *)((intptr_t)ptr & ~3)); 91251539Sed} 92251539Sed 93251524Sed/* 94251559Sed * Utility functions for loading and storing 8-bit and 16-bit integers 95251559Sed * in 32-bit words at an offset corresponding with the location of the 96251559Sed * atomic variable. 97251524Sed */ 98251524Sed 99251539Sedstatic inline void 100251696Sedput_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val) 101251539Sed{ 102251539Sed size_t offset; 103251539Sed 104251539Sed offset = (intptr_t)offset_ptr & 3; 105251539Sed r->v8[offset] = val; 106251539Sed} 107251539Sed 108251539Sedstatic inline uint8_t 109251696Sedget_1(const reg_t *r, const uint8_t *offset_ptr) 110251539Sed{ 111251539Sed size_t offset; 112251539Sed 113251539Sed offset = (intptr_t)offset_ptr & 3; 114251539Sed return (r->v8[offset]); 115251539Sed} 116251539Sed 117251539Sedstatic inline void 118251696Sedput_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val) 119251539Sed{ 120251539Sed size_t offset; 121251539Sed union { 122251539Sed uint16_t in; 123251539Sed uint8_t out[2]; 124251539Sed } bytes; 125251539Sed 126251539Sed offset = (intptr_t)offset_ptr & 3; 127251539Sed bytes.in = val; 128251539Sed r->v8[offset] = bytes.out[0]; 129251539Sed r->v8[offset + 1] = bytes.out[1]; 130251539Sed} 131251539Sed 132251539Sedstatic inline uint16_t 133251696Sedget_2(const reg_t *r, const uint16_t *offset_ptr) 134251539Sed{ 135251539Sed size_t offset; 136251539Sed union { 137251539Sed uint8_t in[2]; 138251539Sed uint16_t out; 139251539Sed } bytes; 140251539Sed 141251539Sed offset = (intptr_t)offset_ptr & 3; 142251539Sed bytes.in[0] = r->v8[offset]; 143251539Sed bytes.in[1] = r->v8[offset + 1]; 144251539Sed return (bytes.out); 145251539Sed} 146251539Sed 147251559Sed/* 148251559Sed * 8-bit and 16-bit routines. 149251559Sed * 150251559Sed * These operations are not natively supported by the CPU, so we use 151251559Sed * some shifting and bitmasking on top of the 32-bit instructions. 152251559Sed */ 153251524Sed 154251559Sed#define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \ 155251559SeduintN_t \ 156251559Sed__sync_lock_test_and_set_##N(uintN_t *mem, uintN_t val) \ 157251559Sed{ \ 158251559Sed uint32_t *mem32; \ 159251559Sed reg_t val32, negmask, old; \ 160251559Sed uint32_t temp; \ 161251559Sed \ 162251559Sed mem32 = round_to_word(mem); \ 163251559Sed val32.v32 = 0x00000000; \ 164251559Sed put_##N(&val32, mem, val); \ 165251559Sed negmask.v32 = 0xffffffff; \ 166251559Sed put_##N(&negmask, mem, 0); \ 167251559Sed \ 168251696Sed do_sync(); \ 169251559Sed __asm volatile ( \ 170251559Sed "1:" \ 171251559Sed "\tll %0, %5\n" /* Load old value. */ \ 172251559Sed "\tand %2, %4, %0\n" /* Remove the old value. */ \ 173251559Sed "\tor %2, %3\n" /* Put in the new value. */ \ 174251559Sed "\tsc %2, %1\n" /* Attempt to store. */ \ 175251559Sed "\tbeqz %2, 1b\n" /* Spin if failed. */ \ 176251559Sed : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp) \ 177251559Sed : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \ 178251559Sed return (get_##N(&old, mem)); \ 179251524Sed} 180251524Sed 181251559SedEMIT_LOCK_TEST_AND_SET_N(1, uint8_t) 182251559SedEMIT_LOCK_TEST_AND_SET_N(2, uint16_t) 183251524Sed 184251559Sed#define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 185251559SeduintN_t \ 186251559Sed__sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \ 187251559Sed uintN_t desired) \ 188251559Sed{ \ 189251559Sed uint32_t *mem32; \ 190251781Sed reg_t expected32, desired32, posmask, old; \ 191251781Sed uint32_t negmask, temp; \ 192251559Sed \ 193251559Sed mem32 = round_to_word(mem); \ 194251559Sed expected32.v32 = 0x00000000; \ 195251559Sed put_##N(&expected32, mem, expected); \ 196251559Sed desired32.v32 = 0x00000000; \ 197251559Sed put_##N(&desired32, mem, desired); \ 198251559Sed posmask.v32 = 0x00000000; \ 199251559Sed put_##N(&posmask, mem, ~0); \ 200251781Sed negmask = ~posmask.v32; \ 201251559Sed \ 202251696Sed do_sync(); \ 203251559Sed __asm volatile ( \ 204251559Sed "1:" \ 205251559Sed "\tll %0, %7\n" /* Load old value. */ \ 206251559Sed "\tand %2, %5, %0\n" /* Isolate the old value. */ \ 207251559Sed "\tbne %2, %3, 2f\n" /* Compare to expected value. */\ 208251559Sed "\tand %2, %6, %0\n" /* Remove the old value. */ \ 209251559Sed "\tor %2, %4\n" /* Put in the new value. */ \ 210251559Sed "\tsc %2, %1\n" /* Attempt to store. */ \ 211251559Sed "\tbeqz %2, 1b\n" /* Spin if failed. */ \ 212251559Sed "2:" \ 213251559Sed : "=&r" (old), "=m" (*mem32), "=&r" (temp) \ 214251559Sed : "r" (expected32.v32), "r" (desired32.v32), \ 215251781Sed "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \ 216251559Sed return (get_##N(&old, mem)); \ 217251524Sed} 218251524Sed 219251559SedEMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t) 220251559SedEMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t) 221251559Sed 222251559Sed#define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \ 223251559SeduintN_t \ 224251559Sed__sync_##name##_##N(uintN_t *mem, uintN_t val) \ 225251524Sed{ \ 226251524Sed uint32_t *mem32; \ 227251781Sed reg_t val32, posmask, old; \ 228251781Sed uint32_t negmask, temp1, temp2; \ 229251524Sed \ 230251559Sed mem32 = round_to_word(mem); \ 231251524Sed val32.v32 = 0x00000000; \ 232251559Sed put_##N(&val32, mem, val); \ 233251559Sed posmask.v32 = 0x00000000; \ 234251559Sed put_##N(&posmask, mem, ~0); \ 235251781Sed negmask = ~posmask.v32; \ 236251524Sed \ 237251696Sed do_sync(); \ 238251524Sed __asm volatile ( \ 239251524Sed "1:" \ 240251524Sed "\tll %0, %7\n" /* Load old value. */ \ 241251524Sed "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 242251559Sed "\tand %2, %5\n" /* Isolate the new value. */ \ 243251559Sed "\tand %3, %6, %0\n" /* Remove the old value. */ \ 244251559Sed "\tor %2, %3\n" /* Put in the new value. */ \ 245251524Sed "\tsc %2, %1\n" /* Attempt to store. */ \ 246251524Sed "\tbeqz %2, 1b\n" /* Spin if failed. */ \ 247251524Sed : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 248251524Sed "=&r" (temp2) \ 249251781Sed : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \ 250251781Sed "m" (*mem32)); \ 251251559Sed return (get_##N(&old, mem)); \ 252251524Sed} 253251524Sed 254251559SedEMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "addu") 255251559SedEMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "subu") 256251559SedEMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "addu") 257251559SedEMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "subu") 258251524Sed 259251559Sed#define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \ 260251559SeduintN_t \ 261251559Sed__sync_##name##_##N(uintN_t *mem, uintN_t val) \ 262251524Sed{ \ 263251524Sed uint32_t *mem32; \ 264251524Sed reg_t val32, old; \ 265251524Sed uint32_t temp; \ 266251524Sed \ 267251559Sed mem32 = round_to_word(mem); \ 268251524Sed val32.v32 = idempotence ? 0xffffffff : 0x00000000; \ 269251559Sed put_##N(&val32, mem, val); \ 270251524Sed \ 271251696Sed do_sync(); \ 272251524Sed __asm volatile ( \ 273251524Sed "1:" \ 274251524Sed "\tll %0, %4\n" /* Load old value. */ \ 275251524Sed "\t"op" %2, %3, %0\n" /* Calculate new value. */ \ 276251524Sed "\tsc %2, %1\n" /* Attempt to store. */ \ 277251524Sed "\tbeqz %2, 1b\n" /* Spin if failed. */ \ 278251524Sed : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp) \ 279251524Sed : "r" (val32.v32), "m" (*mem32)); \ 280251559Sed return (get_##N(&old, mem)); \ 281251524Sed} 282251524Sed 283251559SedEMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1) 284251559SedEMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "or", 0) 285251559SedEMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "xor", 0) 286251559SedEMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1) 287251559SedEMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "or", 0) 288251559SedEMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "xor", 0) 289251524Sed 290251524Sed/* 291251524Sed * 32-bit routines. 292251524Sed */ 293251524Sed 294251524Seduint32_t 295251524Sed__sync_val_compare_and_swap_4(uint32_t *mem, uint32_t expected, 296251524Sed uint32_t desired) 297251524Sed{ 298251524Sed uint32_t old, temp; 299251524Sed 300251696Sed do_sync(); 301251524Sed __asm volatile ( 302251524Sed "1:" 303251524Sed "\tll %0, %5\n" /* Load old value. */ 304251524Sed "\tbne %0, %3, 2f\n" /* Compare to expected value. */ 305251524Sed "\tmove %2, %4\n" /* Value to store. */ 306251524Sed "\tsc %2, %1\n" /* Attempt to store. */ 307251524Sed "\tbeqz %2, 1b\n" /* Spin if failed. */ 308251524Sed "2:" 309251524Sed : "=&r" (old), "=m" (*mem), "=&r" (temp) 310251524Sed : "r" (expected), "r" (desired), "m" (*mem)); 311251524Sed return (old); 312251524Sed} 313251524Sed 314251524Sed#define EMIT_FETCH_AND_OP_4(name, op) \ 315251524Seduint32_t \ 316251524Sed__sync_##name##_4(uint32_t *mem, uint32_t val) \ 317251524Sed{ \ 318251524Sed uint32_t old, temp; \ 319251524Sed \ 320251696Sed do_sync(); \ 321251524Sed __asm volatile ( \ 322251524Sed "1:" \ 323251524Sed "\tll %0, %4\n" /* Load old value. */ \ 324251524Sed "\t"op"\n" /* Calculate new value. */ \ 325251524Sed "\tsc %2, %1\n" /* Attempt to store. */ \ 326251524Sed "\tbeqz %2, 1b\n" /* Spin if failed. */ \ 327251524Sed : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 328251524Sed : "r" (val), "m" (*mem)); \ 329251524Sed return (old); \ 330251524Sed} 331251524Sed 332251524SedEMIT_FETCH_AND_OP_4(lock_test_and_set, "move %2, %3") 333251524SedEMIT_FETCH_AND_OP_4(fetch_and_add, "addu %2, %0, %3") 334251524SedEMIT_FETCH_AND_OP_4(fetch_and_and, "and %2, %0, %3") 335251524SedEMIT_FETCH_AND_OP_4(fetch_and_or, "or %2, %0, %3") 336251524SedEMIT_FETCH_AND_OP_4(fetch_and_sub, "subu %2, %0, %3") 337251524SedEMIT_FETCH_AND_OP_4(fetch_and_xor, "xor %2, %0, %3") 338251524Sed 339251524Sed/* 340251524Sed * 64-bit routines. 341251524Sed * 342251524Sed * Note: All the 64-bit atomic operations are only atomic when running 343251524Sed * in 64-bit mode. It is assumed that code compiled for n32 and n64 fits 344251524Sed * into this definition and no further safeties are needed. 345251524Sed */ 346251524Sed 347251524Sed#if defined(__mips_n32) || defined(__mips_n64) 348251524Sed 349251524Seduint64_t 350251524Sed__sync_val_compare_and_swap_8(uint64_t *mem, uint64_t expected, 351251524Sed uint64_t desired) 352251524Sed{ 353251524Sed uint64_t old, temp; 354251524Sed 355251696Sed do_sync(); 356251524Sed __asm volatile ( 357251524Sed "1:" 358251524Sed "\tlld %0, %5\n" /* Load old value. */ 359251524Sed "\tbne %0, %3, 2f\n" /* Compare to expected value. */ 360251524Sed "\tmove %2, %4\n" /* Value to store. */ 361251524Sed "\tscd %2, %1\n" /* Attempt to store. */ 362251524Sed "\tbeqz %2, 1b\n" /* Spin if failed. */ 363251524Sed "2:" 364251524Sed : "=&r" (old), "=m" (*mem), "=&r" (temp) 365251524Sed : "r" (expected), "r" (desired), "m" (*mem)); 366251524Sed return (old); 367251524Sed} 368251524Sed 369251524Sed#define EMIT_FETCH_AND_OP_8(name, op) \ 370251524Seduint64_t \ 371251524Sed__sync_##name##_8(uint64_t *mem, uint64_t val) \ 372251524Sed{ \ 373251524Sed uint64_t old, temp; \ 374251524Sed \ 375251696Sed do_sync(); \ 376251524Sed __asm volatile ( \ 377251524Sed "1:" \ 378251524Sed "\tlld %0, %4\n" /* Load old value. */ \ 379251524Sed "\t"op"\n" /* Calculate new value. */ \ 380251524Sed "\tscd %2, %1\n" /* Attempt to store. */ \ 381251524Sed "\tbeqz %2, 1b\n" /* Spin if failed. */ \ 382251524Sed : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 383251524Sed : "r" (val), "m" (*mem)); \ 384251524Sed return (old); \ 385251524Sed} 386251524Sed 387251524SedEMIT_FETCH_AND_OP_8(lock_test_and_set, "move %2, %3") 388251524SedEMIT_FETCH_AND_OP_8(fetch_and_add, "daddu %2, %0, %3") 389251524SedEMIT_FETCH_AND_OP_8(fetch_and_and, "and %2, %0, %3") 390251524SedEMIT_FETCH_AND_OP_8(fetch_and_or, "or %2, %0, %3") 391251524SedEMIT_FETCH_AND_OP_8(fetch_and_sub, "dsubu %2, %0, %3") 392251524SedEMIT_FETCH_AND_OP_8(fetch_and_xor, "xor %2, %0, %3") 393251524Sed 394251524Sed#endif /* __mips_n32 || __mips_n64 */ 395251696Sed 396251696Sed#endif /* __SYNC_ATOMICS */ 397