ck_pr.h revision 332391
1/* 2 * Copyright 2009-2015 Samy Al Bahra. 3 * Copyright 2011 Devon H. O'Dell <devon.odell@gmail.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#ifndef CK_PR_X86_H 29#define CK_PR_X86_H 30 31#ifndef CK_PR_H 32#error Do not include this file directly, use ck_pr.h 33#endif 34 35#include <ck_cc.h> 36#include <ck_md.h> 37#include <ck_stdint.h> 38 39/* 40 * The following represent supported atomic operations. 41 * These operations may be emulated. 42 */ 43#include "ck_f_pr.h" 44 45/* Minimum requirements for the CK_PR interface are met. */ 46#define CK_F_PR 47 48/* 49 * Prevent speculative execution in busy-wait loops (P4 <=) or "predefined 50 * delay". 51 */ 52CK_CC_INLINE static void 53ck_pr_stall(void) 54{ 55 __asm__ __volatile__("pause" ::: "memory"); 56 return; 57} 58 59#ifdef CK_MD_UMP 60#define CK_PR_LOCK_PREFIX 61#define CK_PR_FENCE(T, I) \ 62 CK_CC_INLINE static void \ 63 ck_pr_fence_strict_##T(void) \ 64 { \ 65 __asm__ __volatile__("" ::: "memory"); \ 66 return; \ 67 } 68#else 69#define CK_PR_LOCK_PREFIX "lock " 70#define CK_PR_FENCE(T, I) \ 71 CK_CC_INLINE static void \ 72 ck_pr_fence_strict_##T(void) \ 73 { \ 74 __asm__ __volatile__(I ::: "memory"); \ 75 return; \ 76 } 77#endif /* CK_MD_UMP */ 78 79#if defined(CK_MD_SSE_DISABLE) 80/* If SSE is disabled, then use atomic operations for serialization. */ 81#define CK_MD_X86_MFENCE "lock addl $0, (%%esp)" 82#define CK_MD_X86_SFENCE CK_MD_X86_MFENCE 83#define CK_MD_X86_LFENCE CK_MD_X86_MFENCE 84#else 85#define CK_MD_X86_SFENCE "sfence" 86#define CK_MD_X86_LFENCE "lfence" 87#define CK_MD_X86_MFENCE "mfence" 88#endif /* !CK_MD_SSE_DISABLE */ 89 90CK_PR_FENCE(atomic, "") 91CK_PR_FENCE(atomic_store, "") 92CK_PR_FENCE(atomic_load, "") 93CK_PR_FENCE(store_atomic, "") 94CK_PR_FENCE(load_atomic, "") 95CK_PR_FENCE(load, CK_MD_X86_LFENCE) 96CK_PR_FENCE(load_store, CK_MD_X86_MFENCE) 97CK_PR_FENCE(store, CK_MD_X86_SFENCE) 98CK_PR_FENCE(store_load, CK_MD_X86_MFENCE) 99CK_PR_FENCE(memory, CK_MD_X86_MFENCE) 100CK_PR_FENCE(release, CK_MD_X86_MFENCE) 101CK_PR_FENCE(acquire, CK_MD_X86_MFENCE) 102CK_PR_FENCE(acqrel, CK_MD_X86_MFENCE) 103CK_PR_FENCE(lock, CK_MD_X86_MFENCE) 104CK_PR_FENCE(unlock, CK_MD_X86_MFENCE) 105 106#undef CK_PR_FENCE 107 108/* 109 * Atomic fetch-and-store operations. 110 */ 111#define CK_PR_FAS(S, M, T, C, I) \ 112 CK_CC_INLINE static T \ 113 ck_pr_fas_##S(M *target, T v) \ 114 { \ 115 __asm__ __volatile__(I " %0, %1" \ 116 : "+m" (*(C *)target), \ 117 "+q" (v) \ 118 : \ 119 : "memory"); \ 120 return v; \ 121 } 122 123CK_PR_FAS(ptr, void, void *, char, "xchgl") 124 125#define CK_PR_FAS_S(S, T, I) CK_PR_FAS(S, T, T, T, I) 126 127CK_PR_FAS_S(char, char, "xchgb") 128CK_PR_FAS_S(uint, unsigned int, "xchgl") 129CK_PR_FAS_S(int, int, "xchgl") 130CK_PR_FAS_S(32, uint32_t, "xchgl") 131CK_PR_FAS_S(16, uint16_t, "xchgw") 132CK_PR_FAS_S(8, uint8_t, "xchgb") 133 134#undef CK_PR_FAS_S 135#undef CK_PR_FAS 136 137#define CK_PR_LOAD(S, M, T, C, I) \ 138 CK_CC_INLINE static T \ 139 ck_pr_md_load_##S(const M *target) \ 140 { \ 141 T r; \ 142 __asm__ __volatile__(I " %1, %0" \ 143 : "=q" (r) \ 144 : "m" (*(const C *)target) \ 145 : "memory"); \ 146 return (r); \ 147 } 148 149CK_PR_LOAD(ptr, void, void *, char, "movl") 150 151#define CK_PR_LOAD_S(S, T, I) CK_PR_LOAD(S, T, T, T, I) 152 153CK_PR_LOAD_S(char, char, "movb") 154CK_PR_LOAD_S(uint, unsigned int, "movl") 155CK_PR_LOAD_S(int, int, "movl") 156CK_PR_LOAD_S(32, uint32_t, "movl") 157CK_PR_LOAD_S(16, uint16_t, "movw") 158CK_PR_LOAD_S(8, uint8_t, "movb") 159 160#undef CK_PR_LOAD_S 161#undef CK_PR_LOAD 162 163#define CK_PR_STORE(S, M, T, C, I) \ 164 CK_CC_INLINE static void \ 165 ck_pr_md_store_##S(M *target, T v) \ 166 { \ 167 __asm__ __volatile__(I " %1, %0" \ 168 : "=m" (*(C *)target) \ 169 : CK_CC_IMM "q" (v) \ 170 : "memory"); \ 171 return; \ 172 } 173 174CK_PR_STORE(ptr, void, const void *, char, "movl") 175 176#define CK_PR_STORE_S(S, T, I) CK_PR_STORE(S, T, T, T, I) 177 178CK_PR_STORE_S(char, char, "movb") 179CK_PR_STORE_S(uint, unsigned int, "movl") 180CK_PR_STORE_S(int, int, "movl") 181CK_PR_STORE_S(32, uint32_t, "movl") 182CK_PR_STORE_S(16, uint16_t, "movw") 183CK_PR_STORE_S(8, uint8_t, "movb") 184 185#undef CK_PR_STORE_S 186#undef CK_PR_STORE 187 188/* 189 * Atomic fetch-and-add operations. 190 */ 191#define CK_PR_FAA(S, M, T, C, I) \ 192 CK_CC_INLINE static T \ 193 ck_pr_faa_##S(M *target, T d) \ 194 { \ 195 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %1, %0" \ 196 : "+m" (*(C *)target), \ 197 "+q" (d) \ 198 : \ 199 : "memory", "cc"); \ 200 return (d); \ 201 } 202 203CK_PR_FAA(ptr, void, uintptr_t, char, "xaddl") 204 205#define CK_PR_FAA_S(S, T, I) CK_PR_FAA(S, T, T, T, I) 206 207CK_PR_FAA_S(char, char, "xaddb") 208CK_PR_FAA_S(uint, unsigned int, "xaddl") 209CK_PR_FAA_S(int, int, "xaddl") 210CK_PR_FAA_S(32, uint32_t, "xaddl") 211CK_PR_FAA_S(16, uint16_t, "xaddw") 212CK_PR_FAA_S(8, uint8_t, "xaddb") 213 214#undef CK_PR_FAA_S 215#undef CK_PR_FAA 216 217/* 218 * Atomic store-only unary operations. 219 */ 220#define CK_PR_UNARY(K, S, T, C, I) \ 221 CK_PR_UNARY_R(K, S, T, C, I) \ 222 CK_PR_UNARY_V(K, S, T, C, I) 223 224#define CK_PR_UNARY_R(K, S, T, C, I) \ 225 CK_CC_INLINE static void \ 226 ck_pr_##K##_##S(T *target) \ 227 { \ 228 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0" \ 229 : "+m" (*(C *)target) \ 230 : \ 231 : "memory", "cc"); \ 232 return; \ 233 } 234 235#define CK_PR_UNARY_V(K, S, T, C, I) \ 236 CK_CC_INLINE static void \ 237 ck_pr_##K##_##S##_zero(T *target, bool *r) \ 238 { \ 239 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0; setz %1" \ 240 : "+m" (*(C *)target), \ 241 "=m" (*r) \ 242 : \ 243 : "memory", "cc"); \ 244 return; \ 245 } 246 247 248#define CK_PR_UNARY_S(K, S, T, I) CK_PR_UNARY(K, S, T, T, I) 249 250#define CK_PR_GENERATE(K) \ 251 CK_PR_UNARY(K, ptr, void, char, #K "l") \ 252 CK_PR_UNARY_S(K, char, char, #K "b") \ 253 CK_PR_UNARY_S(K, int, int, #K "l") \ 254 CK_PR_UNARY_S(K, uint, unsigned int, #K "l") \ 255 CK_PR_UNARY_S(K, 32, uint32_t, #K "l") \ 256 CK_PR_UNARY_S(K, 16, uint16_t, #K "w") \ 257 CK_PR_UNARY_S(K, 8, uint8_t, #K "b") 258 259CK_PR_GENERATE(inc) 260CK_PR_GENERATE(dec) 261CK_PR_GENERATE(neg) 262 263/* not does not affect condition flags. */ 264#undef CK_PR_UNARY_V 265#define CK_PR_UNARY_V(a, b, c, d, e) 266CK_PR_GENERATE(not) 267 268#undef CK_PR_GENERATE 269#undef CK_PR_UNARY_S 270#undef CK_PR_UNARY_V 271#undef CK_PR_UNARY_R 272#undef CK_PR_UNARY 273 274/* 275 * Atomic store-only binary operations. 276 */ 277#define CK_PR_BINARY(K, S, M, T, C, I) \ 278 CK_CC_INLINE static void \ 279 ck_pr_##K##_##S(M *target, T d) \ 280 { \ 281 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %1, %0" \ 282 : "+m" (*(C *)target) \ 283 : CK_CC_IMM "q" (d) \ 284 : "memory", "cc"); \ 285 return; \ 286 } 287 288#define CK_PR_BINARY_S(K, S, T, I) CK_PR_BINARY(K, S, T, T, T, I) 289 290#define CK_PR_GENERATE(K) \ 291 CK_PR_BINARY(K, ptr, void, uintptr_t, char, #K "l") \ 292 CK_PR_BINARY_S(K, char, char, #K "b") \ 293 CK_PR_BINARY_S(K, int, int, #K "l") \ 294 CK_PR_BINARY_S(K, uint, unsigned int, #K "l") \ 295 CK_PR_BINARY_S(K, 32, uint32_t, #K "l") \ 296 CK_PR_BINARY_S(K, 16, uint16_t, #K "w") \ 297 CK_PR_BINARY_S(K, 8, uint8_t, #K "b") 298 299CK_PR_GENERATE(add) 300CK_PR_GENERATE(sub) 301CK_PR_GENERATE(and) 302CK_PR_GENERATE(or) 303CK_PR_GENERATE(xor) 304 305#undef CK_PR_GENERATE 306#undef CK_PR_BINARY_S 307#undef CK_PR_BINARY 308 309/* 310 * Atomic compare and swap. 311 */ 312#define CK_PR_CAS(S, M, T, C, I) \ 313 CK_CC_INLINE static bool \ 314 ck_pr_cas_##S(M *target, T compare, T set) \ 315 { \ 316 bool z; \ 317 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %2, %0; setz %1" \ 318 : "+m" (*(C *)target), \ 319 "=a" (z) \ 320 : "q" (set), \ 321 "a" (compare) \ 322 : "memory", "cc"); \ 323 return z; \ 324 } 325 326CK_PR_CAS(ptr, void, void *, char, "cmpxchgl") 327 328#define CK_PR_CAS_S(S, T, I) CK_PR_CAS(S, T, T, T, I) 329 330CK_PR_CAS_S(char, char, "cmpxchgb") 331CK_PR_CAS_S(int, int, "cmpxchgl") 332CK_PR_CAS_S(uint, unsigned int, "cmpxchgl") 333CK_PR_CAS_S(32, uint32_t, "cmpxchgl") 334CK_PR_CAS_S(16, uint16_t, "cmpxchgw") 335CK_PR_CAS_S(8, uint8_t, "cmpxchgb") 336 337#undef CK_PR_CAS_S 338#undef CK_PR_CAS 339 340/* 341 * Compare and swap, set *v to old value of target. 342 */ 343#define CK_PR_CAS_O(S, M, T, C, I, R) \ 344 CK_CC_INLINE static bool \ 345 ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ 346 { \ 347 bool z; \ 348 __asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg" I " %3, %0;" \ 349 "mov %% " R ", %2;" \ 350 "setz %1;" \ 351 : "+m" (*(C *)target), \ 352 "=a" (z), \ 353 "=m" (*(C *)v) \ 354 : "q" (set), \ 355 "a" (compare) \ 356 : "memory", "cc"); \ 357 return (bool)z; \ 358 } 359 360CK_PR_CAS_O(ptr, void, void *, char, "l", "eax") 361 362#define CK_PR_CAS_O_S(S, T, I, R) \ 363 CK_PR_CAS_O(S, T, T, T, I, R) 364 365CK_PR_CAS_O_S(char, char, "b", "al") 366CK_PR_CAS_O_S(int, int, "l", "eax") 367CK_PR_CAS_O_S(uint, unsigned int, "l", "eax") 368CK_PR_CAS_O_S(32, uint32_t, "l", "eax") 369CK_PR_CAS_O_S(16, uint16_t, "w", "ax") 370CK_PR_CAS_O_S(8, uint8_t, "b", "al") 371 372#undef CK_PR_CAS_O_S 373#undef CK_PR_CAS_O 374 375/* 376 * Atomic bit test operations. 377 */ 378#define CK_PR_BT(K, S, T, P, C, I) \ 379 CK_CC_INLINE static bool \ 380 ck_pr_##K##_##S(T *target, unsigned int b) \ 381 { \ 382 bool c; \ 383 __asm__ __volatile__(CK_PR_LOCK_PREFIX I "; setc %1" \ 384 : "+m" (*(C *)target), \ 385 "=q" (c) \ 386 : "q" ((P)b) \ 387 : "memory", "cc"); \ 388 return (bool)c; \ 389 } 390 391#define CK_PR_BT_S(K, S, T, I) CK_PR_BT(K, S, T, T, T, I) 392 393#define CK_PR_GENERATE(K) \ 394 CK_PR_BT(K, ptr, void, uint32_t, char, #K "l %2, %0") \ 395 CK_PR_BT_S(K, uint, unsigned int, #K "l %2, %0") \ 396 CK_PR_BT_S(K, int, int, #K "l %2, %0") \ 397 CK_PR_BT_S(K, 32, uint32_t, #K "l %2, %0") \ 398 CK_PR_BT_S(K, 16, uint16_t, #K "w %w2, %0") 399 400CK_PR_GENERATE(btc) 401CK_PR_GENERATE(bts) 402CK_PR_GENERATE(btr) 403 404#undef CK_PR_GENERATE 405#undef CK_PR_BT 406 407#endif /* CK_PR_X86_H */ 408 409