1/* 2 * Copyright 2009-2015 Samy Al Bahra. 3 * Copyright 2011 Devon H. O'Dell <devon.odell@gmail.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#ifndef CK_PR_X86_H 29#define CK_PR_X86_H 30 31#ifndef CK_PR_H 32#error Do not include this file directly, use ck_pr.h 33#endif 34 35#include <ck_cc.h> 36#include <ck_md.h> 37#include <ck_stdint.h> 38 39/* 40 * The following represent supported atomic operations. 41 * These operations may be emulated. 42 */ 43#include "ck_f_pr.h" 44 45/* Minimum requirements for the CK_PR interface are met. */ 46#define CK_F_PR 47 48/* 49 * Prevent speculative execution in busy-wait loops (P4 <=) or "predefined 50 * delay". 51 */ 52CK_CC_INLINE static void 53ck_pr_stall(void) 54{ 55 __asm__ __volatile__("pause" ::: "memory"); 56 return; 57} 58 59#ifdef CK_MD_UMP 60#define CK_PR_LOCK_PREFIX 61#define CK_PR_FENCE(T, I) \ 62 CK_CC_INLINE static void \ 63 ck_pr_fence_strict_##T(void) \ 64 { \ 65 __asm__ __volatile__("" ::: "memory"); \ 66 return; \ 67 } 68#else 69#define CK_PR_LOCK_PREFIX "lock " 70#define CK_PR_FENCE(T, I) \ 71 CK_CC_INLINE static void \ 72 ck_pr_fence_strict_##T(void) \ 73 { \ 74 __asm__ __volatile__(I ::: "memory"); \ 75 return; \ 76 } 77#endif /* CK_MD_UMP */ 78 79#if defined(CK_MD_SSE_DISABLE) 80/* If SSE is disabled, then use atomic operations for serialization. */ 81#define CK_MD_X86_MFENCE "lock addl $0, (%%esp)" 82#define CK_MD_X86_SFENCE CK_MD_X86_MFENCE 83#define CK_MD_X86_LFENCE CK_MD_X86_MFENCE 84#else 85#define CK_MD_X86_SFENCE "sfence" 86#define CK_MD_X86_LFENCE "lfence" 87#define CK_MD_X86_MFENCE "mfence" 88#endif /* !CK_MD_SSE_DISABLE */ 89 90CK_PR_FENCE(atomic, "") 91CK_PR_FENCE(atomic_store, "") 92CK_PR_FENCE(atomic_load, "") 93CK_PR_FENCE(store_atomic, "") 94CK_PR_FENCE(load_atomic, "") 95CK_PR_FENCE(load, CK_MD_X86_LFENCE) 96CK_PR_FENCE(load_store, CK_MD_X86_MFENCE) 97CK_PR_FENCE(store, CK_MD_X86_SFENCE) 98CK_PR_FENCE(store_load, CK_MD_X86_MFENCE) 99CK_PR_FENCE(memory, CK_MD_X86_MFENCE) 100CK_PR_FENCE(release, CK_MD_X86_MFENCE) 101CK_PR_FENCE(acquire, CK_MD_X86_MFENCE) 102CK_PR_FENCE(acqrel, CK_MD_X86_MFENCE) 103CK_PR_FENCE(lock, CK_MD_X86_MFENCE) 104CK_PR_FENCE(unlock, CK_MD_X86_MFENCE) 105 106#undef CK_PR_FENCE 107 108/* 109 * Atomic fetch-and-store operations. 110 */ 111#define CK_PR_FAS(S, M, T, C, I) \ 112 CK_CC_INLINE static T \ 113 ck_pr_fas_##S(M *target, T v) \ 114 { \ 115 __asm__ __volatile__(I " %0, %1" \ 116 : "+m" (*(C *)target), \ 117 "+q" (v) \ 118 : \ 119 : "memory"); \ 120 return v; \ 121 } 122 123CK_PR_FAS(ptr, void, void *, uint32_t, "xchgl") 124 125#define CK_PR_FAS_S(S, T, I) CK_PR_FAS(S, T, T, T, I) 126 127CK_PR_FAS_S(char, char, "xchgb") 128CK_PR_FAS_S(uint, unsigned int, "xchgl") 129CK_PR_FAS_S(int, int, "xchgl") 130CK_PR_FAS_S(32, uint32_t, "xchgl") 131CK_PR_FAS_S(16, uint16_t, "xchgw") 132CK_PR_FAS_S(8, uint8_t, "xchgb") 133 134#undef CK_PR_FAS_S 135#undef CK_PR_FAS 136 137#define CK_PR_LOAD(S, M, T, C, I) \ 138 CK_CC_INLINE static T \ 139 ck_pr_md_load_##S(const M *target) \ 140 { \ 141 T r; \ 142 __asm__ __volatile__(I " %1, %0" \ 143 : "=q" (r) \ 144 : "m" (*(const C *)target) \ 145 : "memory"); \ 146 return (r); \ 147 } 148 149CK_PR_LOAD(ptr, void, void *, uint32_t, "movl") 150 151#define CK_PR_LOAD_S(S, T, I) CK_PR_LOAD(S, T, T, T, I) 152 153CK_PR_LOAD_S(char, char, "movb") 154CK_PR_LOAD_S(uint, unsigned int, "movl") 155CK_PR_LOAD_S(int, int, "movl") 156CK_PR_LOAD_S(32, uint32_t, "movl") 157CK_PR_LOAD_S(16, uint16_t, "movw") 158CK_PR_LOAD_S(8, uint8_t, "movb") 159 160#undef CK_PR_LOAD_S 161#undef CK_PR_LOAD 162 163#define CK_PR_STORE(S, M, T, C, I) \ 164 CK_CC_INLINE static void \ 165 ck_pr_md_store_##S(M *target, T v) \ 166 { \ 167 __asm__ __volatile__(I " %1, %0" \ 168 : "=m" (*(C *)target) \ 169 : CK_CC_IMM "q" (v) \ 170 : "memory"); \ 171 return; \ 172 } 173 174CK_PR_STORE(ptr, void, const void *, uint32_t, "movl") 175 176#define CK_PR_STORE_S(S, T, I) CK_PR_STORE(S, T, T, T, I) 177 178CK_PR_STORE_S(char, char, "movb") 179CK_PR_STORE_S(uint, unsigned int, "movl") 180CK_PR_STORE_S(int, int, "movl") 181CK_PR_STORE_S(32, uint32_t, "movl") 182CK_PR_STORE_S(16, uint16_t, "movw") 183CK_PR_STORE_S(8, uint8_t, "movb") 184 185#undef CK_PR_STORE_S 186#undef CK_PR_STORE 187 188/* 189 * Atomic fetch-and-add operations. 190 */ 191#define CK_PR_FAA(S, M, T, C, I) \ 192 CK_CC_INLINE static T \ 193 ck_pr_faa_##S(M *target, T d) \ 194 { \ 195 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %1, %0" \ 196 : "+m" (*(C *)target), \ 197 "+q" (d) \ 198 : \ 199 : "memory", "cc"); \ 200 return (d); \ 201 } 202 203CK_PR_FAA(ptr, void, uintptr_t, uint32_t, "xaddl") 204 205#define CK_PR_FAA_S(S, T, I) CK_PR_FAA(S, T, T, T, I) 206 207CK_PR_FAA_S(char, char, "xaddb") 208CK_PR_FAA_S(uint, unsigned int, "xaddl") 209CK_PR_FAA_S(int, int, "xaddl") 210CK_PR_FAA_S(32, uint32_t, "xaddl") 211CK_PR_FAA_S(16, uint16_t, "xaddw") 212CK_PR_FAA_S(8, uint8_t, "xaddb") 213 214#undef CK_PR_FAA_S 215#undef CK_PR_FAA 216 217/* 218 * Atomic store-only unary operations. 219 */ 220#define CK_PR_UNARY(K, S, T, C, I) \ 221 CK_PR_UNARY_R(K, S, T, C, I) \ 222 CK_PR_UNARY_V(K, S, T, C, I) 223 224#define CK_PR_UNARY_R(K, S, T, C, I) \ 225 CK_CC_INLINE static void \ 226 ck_pr_##K##_##S(T *target) \ 227 { \ 228 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0" \ 229 : "+m" (*(C *)target) \ 230 : \ 231 : "memory", "cc"); \ 232 return; \ 233 } 234 235#define CK_PR_UNARY_V(K, S, T, C, I) \ 236 CK_CC_INLINE static bool \ 237 ck_pr_##K##_##S##_is_zero(T *target) \ 238 { \ 239 bool ret; \ 240 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0; setz %1" \ 241 : "+m" (*(C *)target), \ 242 "=qm" (ret) \ 243 : \ 244 : "memory", "cc"); \ 245 return ret; \ 246 } 247 248#define CK_PR_UNARY_S(K, S, T, I) CK_PR_UNARY(K, S, T, T, I) 249 250#define CK_PR_GENERATE(K) \ 251 CK_PR_UNARY(K, ptr, void, uint32_t, #K "l") \ 252 CK_PR_UNARY_S(K, char, char, #K "b") \ 253 CK_PR_UNARY_S(K, int, int, #K "l") \ 254 CK_PR_UNARY_S(K, uint, unsigned int, #K "l") \ 255 CK_PR_UNARY_S(K, 32, uint32_t, #K "l") \ 256 CK_PR_UNARY_S(K, 16, uint16_t, #K "w") \ 257 CK_PR_UNARY_S(K, 8, uint8_t, #K "b") 258 259CK_PR_GENERATE(inc) 260CK_PR_GENERATE(dec) 261CK_PR_GENERATE(neg) 262 263/* not does not affect condition flags. */ 264#undef CK_PR_UNARY_V 265#define CK_PR_UNARY_V(a, b, c, d, e) 266CK_PR_GENERATE(not) 267 268#undef CK_PR_GENERATE 269#undef CK_PR_UNARY_S 270#undef CK_PR_UNARY_V 271#undef CK_PR_UNARY_R 272#undef CK_PR_UNARY 273 274/* 275 * Atomic store-only binary operations. 276 */ 277#define CK_PR_BINARY(K, S, M, T, C, I) \ 278 CK_CC_INLINE static void \ 279 ck_pr_##K##_##S(M *target, T d) \ 280 { \ 281 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %1, %0" \ 282 : "+m" (*(C *)target) \ 283 : CK_CC_IMM "q" (d) \ 284 : "memory", "cc"); \ 285 return; \ 286 } 287 288#define CK_PR_BINARY_S(K, S, T, I) CK_PR_BINARY(K, S, T, T, T, I) 289 290#define CK_PR_GENERATE(K) \ 291 CK_PR_BINARY(K, ptr, void, uintptr_t, uint32_t, #K "l") \ 292 CK_PR_BINARY_S(K, char, char, #K "b") \ 293 CK_PR_BINARY_S(K, int, int, #K "l") \ 294 CK_PR_BINARY_S(K, uint, unsigned int, #K "l") \ 295 CK_PR_BINARY_S(K, 32, uint32_t, #K "l") \ 296 CK_PR_BINARY_S(K, 16, uint16_t, #K "w") \ 297 CK_PR_BINARY_S(K, 8, uint8_t, #K "b") 298 299CK_PR_GENERATE(add) 300CK_PR_GENERATE(sub) 301CK_PR_GENERATE(and) 302CK_PR_GENERATE(or) 303CK_PR_GENERATE(xor) 304 305#undef CK_PR_GENERATE 306#undef CK_PR_BINARY_S 307#undef CK_PR_BINARY 308 309/* 310 * Atomic compare and swap, with a variant that sets *v to the old value of target. 311 */ 312#ifdef __GCC_ASM_FLAG_OUTPUTS__ 313#define CK_PR_CAS(S, M, T, C, I) \ 314 CK_CC_INLINE static bool \ 315 ck_pr_cas_##S(M *target, T compare, T set) \ 316 { \ 317 bool z; \ 318 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0" \ 319 : "+m" (*(C *)target), \ 320 "=@ccz" (z), \ 321 /* RAX is clobbered by cmpxchg. */ \ 322 "+a" (compare) \ 323 : "q" (set) \ 324 : "memory", "cc"); \ 325 return z; \ 326 } \ 327 \ 328 CK_CC_INLINE static bool \ 329 ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ 330 { \ 331 bool z; \ 332 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \ 333 : "+m" (*(C *)target), \ 334 "=@ccz" (z), \ 335 "+a" (compare) \ 336 : "q" (set) \ 337 : "memory", "cc"); \ 338 *(T *)v = compare; \ 339 return z; \ 340 } 341#else 342#define CK_PR_CAS(S, M, T, C, I) \ 343 CK_CC_INLINE static bool \ 344 ck_pr_cas_##S(M *target, T compare, T set) \ 345 { \ 346 bool z; \ 347 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %2, %0; setz %1" \ 348 : "+m" (*(C *)target), \ 349 "=a" (z) \ 350 : "q" (set), \ 351 "a" (compare) \ 352 : "memory", "cc"); \ 353 return z; \ 354 } \ 355 \ 356 CK_CC_INLINE static bool \ 357 ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ 358 { \ 359 bool z; \ 360 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \ 361 "setz %1;" \ 362 : "+m" (*(C *)target), \ 363 "=q" (z), \ 364 "+a" (compare) \ 365 : "q" (set) \ 366 : "memory", "cc"); \ 367 *(T *)v = compare; \ 368 return z; \ 369 } 370#endif 371 372CK_PR_CAS(ptr, void, void *, uint32_t, "cmpxchgl") 373 374#define CK_PR_CAS_S(S, T, I) CK_PR_CAS(S, T, T, T, I) 375 376CK_PR_CAS_S(char, char, "cmpxchgb") 377CK_PR_CAS_S(int, int, "cmpxchgl") 378CK_PR_CAS_S(uint, unsigned int, "cmpxchgl") 379CK_PR_CAS_S(32, uint32_t, "cmpxchgl") 380CK_PR_CAS_S(16, uint16_t, "cmpxchgw") 381CK_PR_CAS_S(8, uint8_t, "cmpxchgb") 382 383#undef CK_PR_CAS_S 384#undef CK_PR_CAS 385 386/* 387 * Atomic bit test operations. 388 */ 389#define CK_PR_BT(K, S, T, P, C, I) \ 390 CK_CC_INLINE static bool \ 391 ck_pr_##K##_##S(T *target, unsigned int b) \ 392 { \ 393 bool c; \ 394 __asm__ __volatile__(CK_PR_LOCK_PREFIX I "; setc %1" \ 395 : "+m" (*(C *)target), \ 396 "=q" (c) \ 397 : "q" ((P)b) \ 398 : "memory", "cc"); \ 399 return (bool)c; \ 400 } 401 402#define CK_PR_BT_S(K, S, T, I) CK_PR_BT(K, S, T, T, T, I) 403 404#define CK_PR_GENERATE(K) \ 405 CK_PR_BT(K, ptr, void, uint32_t, uint32_t, #K "l %2, %0") \ 406 CK_PR_BT_S(K, uint, unsigned int, #K "l %2, %0") \ 407 CK_PR_BT_S(K, int, int, #K "l %2, %0") \ 408 CK_PR_BT_S(K, 32, uint32_t, #K "l %2, %0") \ 409 CK_PR_BT_S(K, 16, uint16_t, #K "w %w2, %0") 410 411CK_PR_GENERATE(btc) 412CK_PR_GENERATE(bts) 413CK_PR_GENERATE(btr) 414 415#undef CK_PR_GENERATE 416#undef CK_PR_BT 417 418#endif /* CK_PR_X86_H */ 419 420