1/*- 2 * Copyright (c) 1998 Doug Rabson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: releng/10.3/sys/amd64/include/atomic.h 254618 2013-08-21 22:05:58Z jkim $ 27 */ 28#ifndef _MACHINE_ATOMIC_H_ 29#define _MACHINE_ATOMIC_H_ 30 31#ifndef _SYS_CDEFS_H_ 32#error this file needs sys/cdefs.h as a prerequisite 33#endif 34 35#define mb() __asm __volatile("mfence;" : : : "memory") 36#define wmb() __asm __volatile("sfence;" : : : "memory") 37#define rmb() __asm __volatile("lfence;" : : : "memory") 38 39/* 40 * Various simple operations on memory, each of which is atomic in the 41 * presence of interrupts and multiple processors. 42 * 43 * atomic_set_char(P, V) (*(u_char *)(P) |= (V)) 44 * atomic_clear_char(P, V) (*(u_char *)(P) &= ~(V)) 45 * atomic_add_char(P, V) (*(u_char *)(P) += (V)) 46 * atomic_subtract_char(P, V) (*(u_char *)(P) -= (V)) 47 * 48 * atomic_set_short(P, V) (*(u_short *)(P) |= (V)) 49 * atomic_clear_short(P, V) (*(u_short *)(P) &= ~(V)) 50 * atomic_add_short(P, V) (*(u_short *)(P) += (V)) 51 * atomic_subtract_short(P, V) (*(u_short *)(P) -= (V)) 52 * 53 * atomic_set_int(P, V) (*(u_int *)(P) |= (V)) 54 * atomic_clear_int(P, V) (*(u_int *)(P) &= ~(V)) 55 * atomic_add_int(P, V) (*(u_int *)(P) += (V)) 56 * atomic_subtract_int(P, V) (*(u_int *)(P) -= (V)) 57 * atomic_swap_int(P, V) (return (*(u_int *)(P)); *(u_int *)(P) = (V);) 58 * atomic_readandclear_int(P) (return (*(u_int *)(P)); *(u_int *)(P) = 0;) 59 * 60 * atomic_set_long(P, V) (*(u_long *)(P) |= (V)) 61 * atomic_clear_long(P, V) (*(u_long *)(P) &= ~(V)) 62 * atomic_add_long(P, V) (*(u_long *)(P) += (V)) 63 * atomic_subtract_long(P, V) (*(u_long *)(P) -= (V)) 64 * atomic_swap_long(P, V) (return (*(u_long *)(P)); *(u_long *)(P) = (V);) 65 * atomic_readandclear_long(P) (return (*(u_long *)(P)); *(u_long *)(P) = 0;) 66 */ 67 68/* 69 * The above functions are expanded inline in the statically-linked 70 * kernel. Lock prefixes are generated if an SMP kernel is being 71 * built. 72 * 73 * Kernel modules call real functions which are built into the kernel. 74 * This allows kernel modules to be portable between UP and SMP systems. 75 */ 76#if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM) 77#define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ 78void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v); \ 79void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v) 80 81int atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src); 82int atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src); 83u_int atomic_fetchadd_int(volatile u_int *p, u_int v); 84u_long atomic_fetchadd_long(volatile u_long *p, u_long v); 85int atomic_testandset_int(volatile u_int *p, u_int v); 86int atomic_testandset_long(volatile u_long *p, u_int v); 87 88#define ATOMIC_LOAD(TYPE, LOP) \ 89u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p) 90#define ATOMIC_STORE(TYPE) \ 91void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) 92 93#else /* !KLD_MODULE && __GNUCLIKE_ASM */ 94 95/* 96 * For userland, always use lock prefixes so that the binaries will run 97 * on both SMP and !SMP systems. 98 */ 99#if defined(SMP) || !defined(_KERNEL) 100#define MPLOCKED "lock ; " 101#else 102#define MPLOCKED 103#endif 104 105/* 106 * The assembly is volatilized to avoid code chunk removal by the compiler. 107 * GCC aggressively reorders operations and memory clobbering is necessary 108 * in order to avoid that for memory barriers. 109 */ 110#define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ 111static __inline void \ 112atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ 113{ \ 114 __asm __volatile(MPLOCKED OP \ 115 : "+m" (*p) \ 116 : CONS (V) \ 117 : "cc"); \ 118} \ 119 \ 120static __inline void \ 121atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ 122{ \ 123 __asm __volatile(MPLOCKED OP \ 124 : "+m" (*p) \ 125 : CONS (V) \ 126 : "memory", "cc"); \ 127} \ 128struct __hack 129 130/* 131 * Atomic compare and set, used by the mutex functions 132 * 133 * if (*dst == expect) *dst = src (all 32 bit words) 134 * 135 * Returns 0 on failure, non-zero on success 136 */ 137 138static __inline int 139atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src) 140{ 141 u_char res; 142 143 __asm __volatile( 144 " " MPLOCKED " " 145 " cmpxchgl %3,%1 ; " 146 " sete %0 ; " 147 "# atomic_cmpset_int" 148 : "=q" (res), /* 0 */ 149 "+m" (*dst), /* 1 */ 150 "+a" (expect) /* 2 */ 151 : "r" (src) /* 3 */ 152 : "memory", "cc"); 153 return (res); 154} 155 156static __inline int 157atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src) 158{ 159 u_char res; 160 161 __asm __volatile( 162 " " MPLOCKED " " 163 " cmpxchgq %3,%1 ; " 164 " sete %0 ; " 165 "# atomic_cmpset_long" 166 : "=q" (res), /* 0 */ 167 "+m" (*dst), /* 1 */ 168 "+a" (expect) /* 2 */ 169 : "r" (src) /* 3 */ 170 : "memory", "cc"); 171 return (res); 172} 173 174/* 175 * Atomically add the value of v to the integer pointed to by p and return 176 * the previous value of *p. 177 */ 178static __inline u_int 179atomic_fetchadd_int(volatile u_int *p, u_int v) 180{ 181 182 __asm __volatile( 183 " " MPLOCKED " " 184 " xaddl %0,%1 ; " 185 "# atomic_fetchadd_int" 186 : "+r" (v), /* 0 */ 187 "+m" (*p) /* 1 */ 188 : : "cc"); 189 return (v); 190} 191 192/* 193 * Atomically add the value of v to the long integer pointed to by p and return 194 * the previous value of *p. 195 */ 196static __inline u_long 197atomic_fetchadd_long(volatile u_long *p, u_long v) 198{ 199 200 __asm __volatile( 201 " " MPLOCKED " " 202 " xaddq %0,%1 ; " 203 "# atomic_fetchadd_long" 204 : "+r" (v), /* 0 */ 205 "+m" (*p) /* 1 */ 206 : : "cc"); 207 return (v); 208} 209 210static __inline int 211atomic_testandset_int(volatile u_int *p, u_int v) 212{ 213 u_char res; 214 215 __asm __volatile( 216 " " MPLOCKED " " 217 " btsl %2,%1 ; " 218 " setc %0 ; " 219 "# atomic_testandset_int" 220 : "=q" (res), /* 0 */ 221 "+m" (*p) /* 1 */ 222 : "Ir" (v & 0x1f) /* 2 */ 223 : "cc"); 224 return (res); 225} 226 227static __inline int 228atomic_testandset_long(volatile u_long *p, u_int v) 229{ 230 u_char res; 231 232 __asm __volatile( 233 " " MPLOCKED " " 234 " btsq %2,%1 ; " 235 " setc %0 ; " 236 "# atomic_testandset_long" 237 : "=q" (res), /* 0 */ 238 "+m" (*p) /* 1 */ 239 : "Jr" ((u_long)(v & 0x3f)) /* 2 */ 240 : "cc"); 241 return (res); 242} 243 244/* 245 * We assume that a = b will do atomic loads and stores. Due to the 246 * IA32 memory model, a simple store guarantees release semantics. 247 * 248 * However, loads may pass stores, so for atomic_load_acq we have to 249 * ensure a Store/Load barrier to do the load in SMP kernels. We use 250 * "lock cmpxchg" as recommended by the AMD Software Optimization 251 * Guide, and not mfence. For UP kernels, however, the cache of the 252 * single processor is always consistent, so we only need to take care 253 * of the compiler. 254 */ 255#define ATOMIC_STORE(TYPE) \ 256static __inline void \ 257atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ 258{ \ 259 __compiler_membar(); \ 260 *p = v; \ 261} \ 262struct __hack 263 264#if defined(_KERNEL) && !defined(SMP) 265 266#define ATOMIC_LOAD(TYPE, LOP) \ 267static __inline u_##TYPE \ 268atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ 269{ \ 270 u_##TYPE tmp; \ 271 \ 272 tmp = *p; \ 273 __compiler_membar(); \ 274 return (tmp); \ 275} \ 276struct __hack 277 278#else /* !(_KERNEL && !SMP) */ 279 280#define ATOMIC_LOAD(TYPE, LOP) \ 281static __inline u_##TYPE \ 282atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ 283{ \ 284 u_##TYPE res; \ 285 \ 286 __asm __volatile(MPLOCKED LOP \ 287 : "=a" (res), /* 0 */ \ 288 "+m" (*p) /* 1 */ \ 289 : : "memory", "cc"); \ 290 return (res); \ 291} \ 292struct __hack 293 294#endif /* _KERNEL && !SMP */ 295 296#endif /* KLD_MODULE || !__GNUCLIKE_ASM */ 297 298ATOMIC_ASM(set, char, "orb %b1,%0", "iq", v); 299ATOMIC_ASM(clear, char, "andb %b1,%0", "iq", ~v); 300ATOMIC_ASM(add, char, "addb %b1,%0", "iq", v); 301ATOMIC_ASM(subtract, char, "subb %b1,%0", "iq", v); 302 303ATOMIC_ASM(set, short, "orw %w1,%0", "ir", v); 304ATOMIC_ASM(clear, short, "andw %w1,%0", "ir", ~v); 305ATOMIC_ASM(add, short, "addw %w1,%0", "ir", v); 306ATOMIC_ASM(subtract, short, "subw %w1,%0", "ir", v); 307 308ATOMIC_ASM(set, int, "orl %1,%0", "ir", v); 309ATOMIC_ASM(clear, int, "andl %1,%0", "ir", ~v); 310ATOMIC_ASM(add, int, "addl %1,%0", "ir", v); 311ATOMIC_ASM(subtract, int, "subl %1,%0", "ir", v); 312 313ATOMIC_ASM(set, long, "orq %1,%0", "ir", v); 314ATOMIC_ASM(clear, long, "andq %1,%0", "ir", ~v); 315ATOMIC_ASM(add, long, "addq %1,%0", "ir", v); 316ATOMIC_ASM(subtract, long, "subq %1,%0", "ir", v); 317 318ATOMIC_LOAD(char, "cmpxchgb %b0,%1"); 319ATOMIC_LOAD(short, "cmpxchgw %w0,%1"); 320ATOMIC_LOAD(int, "cmpxchgl %0,%1"); 321ATOMIC_LOAD(long, "cmpxchgq %0,%1"); 322 323ATOMIC_STORE(char); 324ATOMIC_STORE(short); 325ATOMIC_STORE(int); 326ATOMIC_STORE(long); 327 328#undef ATOMIC_ASM 329#undef ATOMIC_LOAD 330#undef ATOMIC_STORE 331 332#ifndef WANT_FUNCTIONS 333 334/* Read the current value and store a new value in the destination. */ 335#ifdef __GNUCLIKE_ASM 336 337static __inline u_int 338atomic_swap_int(volatile u_int *p, u_int v) 339{ 340 341 __asm __volatile( 342 " xchgl %1,%0 ; " 343 "# atomic_swap_int" 344 : "+r" (v), /* 0 */ 345 "+m" (*p)); /* 1 */ 346 return (v); 347} 348 349static __inline u_long 350atomic_swap_long(volatile u_long *p, u_long v) 351{ 352 353 __asm __volatile( 354 " xchgq %1,%0 ; " 355 "# atomic_swap_long" 356 : "+r" (v), /* 0 */ 357 "+m" (*p)); /* 1 */ 358 return (v); 359} 360 361#else /* !__GNUCLIKE_ASM */ 362 363u_int atomic_swap_int(volatile u_int *p, u_int v); 364u_long atomic_swap_long(volatile u_long *p, u_long v); 365 366#endif /* __GNUCLIKE_ASM */ 367 368#define atomic_set_acq_char atomic_set_barr_char 369#define atomic_set_rel_char atomic_set_barr_char 370#define atomic_clear_acq_char atomic_clear_barr_char 371#define atomic_clear_rel_char atomic_clear_barr_char 372#define atomic_add_acq_char atomic_add_barr_char 373#define atomic_add_rel_char atomic_add_barr_char 374#define atomic_subtract_acq_char atomic_subtract_barr_char 375#define atomic_subtract_rel_char atomic_subtract_barr_char 376 377#define atomic_set_acq_short atomic_set_barr_short 378#define atomic_set_rel_short atomic_set_barr_short 379#define atomic_clear_acq_short atomic_clear_barr_short 380#define atomic_clear_rel_short atomic_clear_barr_short 381#define atomic_add_acq_short atomic_add_barr_short 382#define atomic_add_rel_short atomic_add_barr_short 383#define atomic_subtract_acq_short atomic_subtract_barr_short 384#define atomic_subtract_rel_short atomic_subtract_barr_short 385 386#define atomic_set_acq_int atomic_set_barr_int 387#define atomic_set_rel_int atomic_set_barr_int 388#define atomic_clear_acq_int atomic_clear_barr_int 389#define atomic_clear_rel_int atomic_clear_barr_int 390#define atomic_add_acq_int atomic_add_barr_int 391#define atomic_add_rel_int atomic_add_barr_int 392#define atomic_subtract_acq_int atomic_subtract_barr_int 393#define atomic_subtract_rel_int atomic_subtract_barr_int 394#define atomic_cmpset_acq_int atomic_cmpset_int 395#define atomic_cmpset_rel_int atomic_cmpset_int 396 397#define atomic_set_acq_long atomic_set_barr_long 398#define atomic_set_rel_long atomic_set_barr_long 399#define atomic_clear_acq_long atomic_clear_barr_long 400#define atomic_clear_rel_long atomic_clear_barr_long 401#define atomic_add_acq_long atomic_add_barr_long 402#define atomic_add_rel_long atomic_add_barr_long 403#define atomic_subtract_acq_long atomic_subtract_barr_long 404#define atomic_subtract_rel_long atomic_subtract_barr_long 405#define atomic_cmpset_acq_long atomic_cmpset_long 406#define atomic_cmpset_rel_long atomic_cmpset_long 407 408#define atomic_readandclear_int(p) atomic_swap_int(p, 0) 409#define atomic_readandclear_long(p) atomic_swap_long(p, 0) 410 411/* Operations on 8-bit bytes. */ 412#define atomic_set_8 atomic_set_char 413#define atomic_set_acq_8 atomic_set_acq_char 414#define atomic_set_rel_8 atomic_set_rel_char 415#define atomic_clear_8 atomic_clear_char 416#define atomic_clear_acq_8 atomic_clear_acq_char 417#define atomic_clear_rel_8 atomic_clear_rel_char 418#define atomic_add_8 atomic_add_char 419#define atomic_add_acq_8 atomic_add_acq_char 420#define atomic_add_rel_8 atomic_add_rel_char 421#define atomic_subtract_8 atomic_subtract_char 422#define atomic_subtract_acq_8 atomic_subtract_acq_char 423#define atomic_subtract_rel_8 atomic_subtract_rel_char 424#define atomic_load_acq_8 atomic_load_acq_char 425#define atomic_store_rel_8 atomic_store_rel_char 426 427/* Operations on 16-bit words. */ 428#define atomic_set_16 atomic_set_short 429#define atomic_set_acq_16 atomic_set_acq_short 430#define atomic_set_rel_16 atomic_set_rel_short 431#define atomic_clear_16 atomic_clear_short 432#define atomic_clear_acq_16 atomic_clear_acq_short 433#define atomic_clear_rel_16 atomic_clear_rel_short 434#define atomic_add_16 atomic_add_short 435#define atomic_add_acq_16 atomic_add_acq_short 436#define atomic_add_rel_16 atomic_add_rel_short 437#define atomic_subtract_16 atomic_subtract_short 438#define atomic_subtract_acq_16 atomic_subtract_acq_short 439#define atomic_subtract_rel_16 atomic_subtract_rel_short 440#define atomic_load_acq_16 atomic_load_acq_short 441#define atomic_store_rel_16 atomic_store_rel_short 442 443/* Operations on 32-bit double words. */ 444#define atomic_set_32 atomic_set_int 445#define atomic_set_acq_32 atomic_set_acq_int 446#define atomic_set_rel_32 atomic_set_rel_int 447#define atomic_clear_32 atomic_clear_int 448#define atomic_clear_acq_32 atomic_clear_acq_int 449#define atomic_clear_rel_32 atomic_clear_rel_int 450#define atomic_add_32 atomic_add_int 451#define atomic_add_acq_32 atomic_add_acq_int 452#define atomic_add_rel_32 atomic_add_rel_int 453#define atomic_subtract_32 atomic_subtract_int 454#define atomic_subtract_acq_32 atomic_subtract_acq_int 455#define atomic_subtract_rel_32 atomic_subtract_rel_int 456#define atomic_load_acq_32 atomic_load_acq_int 457#define atomic_store_rel_32 atomic_store_rel_int 458#define atomic_cmpset_32 atomic_cmpset_int 459#define atomic_cmpset_acq_32 atomic_cmpset_acq_int 460#define atomic_cmpset_rel_32 atomic_cmpset_rel_int 461#define atomic_swap_32 atomic_swap_int 462#define atomic_readandclear_32 atomic_readandclear_int 463#define atomic_fetchadd_32 atomic_fetchadd_int 464#define atomic_testandset_32 atomic_testandset_int 465 466/* Operations on 64-bit quad words. */ 467#define atomic_set_64 atomic_set_long 468#define atomic_set_acq_64 atomic_set_acq_long 469#define atomic_set_rel_64 atomic_set_rel_long 470#define atomic_clear_64 atomic_clear_long 471#define atomic_clear_acq_64 atomic_clear_acq_long 472#define atomic_clear_rel_64 atomic_clear_rel_long 473#define atomic_add_64 atomic_add_long 474#define atomic_add_acq_64 atomic_add_acq_long 475#define atomic_add_rel_64 atomic_add_rel_long 476#define atomic_subtract_64 atomic_subtract_long 477#define atomic_subtract_acq_64 atomic_subtract_acq_long 478#define atomic_subtract_rel_64 atomic_subtract_rel_long 479#define atomic_load_acq_64 atomic_load_acq_long 480#define atomic_store_rel_64 atomic_store_rel_long 481#define atomic_cmpset_64 atomic_cmpset_long 482#define atomic_cmpset_acq_64 atomic_cmpset_acq_long 483#define atomic_cmpset_rel_64 atomic_cmpset_rel_long 484#define atomic_swap_64 atomic_swap_long 485#define atomic_readandclear_64 atomic_readandclear_long 486#define atomic_testandset_64 atomic_testandset_long 487 488/* Operations on pointers. */ 489#define atomic_set_ptr atomic_set_long 490#define atomic_set_acq_ptr atomic_set_acq_long 491#define atomic_set_rel_ptr atomic_set_rel_long 492#define atomic_clear_ptr atomic_clear_long 493#define atomic_clear_acq_ptr atomic_clear_acq_long 494#define atomic_clear_rel_ptr atomic_clear_rel_long 495#define atomic_add_ptr atomic_add_long 496#define atomic_add_acq_ptr atomic_add_acq_long 497#define atomic_add_rel_ptr atomic_add_rel_long 498#define atomic_subtract_ptr atomic_subtract_long 499#define atomic_subtract_acq_ptr atomic_subtract_acq_long 500#define atomic_subtract_rel_ptr atomic_subtract_rel_long 501#define atomic_load_acq_ptr atomic_load_acq_long 502#define atomic_store_rel_ptr atomic_store_rel_long 503#define atomic_cmpset_ptr atomic_cmpset_long 504#define atomic_cmpset_acq_ptr atomic_cmpset_acq_long 505#define atomic_cmpset_rel_ptr atomic_cmpset_rel_long 506#define atomic_swap_ptr atomic_swap_long 507#define atomic_readandclear_ptr atomic_readandclear_long 508 509#endif /* !WANT_FUNCTIONS */ 510 511#endif /* !_MACHINE_ATOMIC_H_ */ 512