1/* $NetBSD: atomic.S,v 1.29 2022/07/30 14:11:00 riastradh Exp $ */ 2 3/*- 4 * Copyright (c) 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <sys/param.h> 33#include <machine/asm.h> 34 35#ifdef _KERNEL 36#define ALIAS(f, t) STRONG_ALIAS(f,t) 37#else 38#define ALIAS(f, t) WEAK_ALIAS(f,t) 39#endif 40 41 .text 42 43/* 32-bit */ 44 45ENTRY(_atomic_add_32) 46 LOCK 47 addl %esi, (%rdi) 48 ret 49END(_atomic_add_32) 50 51ENTRY(_atomic_add_32_nv) 52 movl %esi, %eax 53 LOCK 54 xaddl %eax, (%rdi) 55 addl %esi, %eax 56 ret 57END(_atomic_add_32_nv) 58 59ENTRY(_atomic_and_32) 60 LOCK 61 andl %esi, (%rdi) 62 ret 63END(_atomic_and_32) 64 65ENTRY(_atomic_and_32_nv) 66 movl (%rdi), %eax 671: 68 movl %eax, %ecx 69 andl %esi, %ecx 70 LOCK 71 cmpxchgl %ecx, (%rdi) 72 jnz 1b 73 movl %ecx, %eax 74 ret 75END(_atomic_and_32_nv) 76 77ENTRY(_atomic_dec_32) 78 LOCK 79 decl (%rdi) 80 ret 81END(_atomic_dec_32) 82 83ENTRY(_atomic_dec_32_nv) 84 movl $-1, %eax 85 LOCK 86 xaddl %eax, (%rdi) 87 decl %eax 88 ret 89END(_atomic_dec_32_nv) 90 91ENTRY(_atomic_inc_32) 92 LOCK 93 incl (%rdi) 94 ret 95END(_atomic_inc_32) 96 97ENTRY(_atomic_inc_32_nv) 98 movl $1, %eax 99 LOCK 100 xaddl %eax, (%rdi) 101 incl %eax 102 ret 103END(_atomic_inc_32_nv) 104 105ENTRY(_atomic_or_32) 106 LOCK 107 orl %esi, (%rdi) 108 ret 109END(_atomic_or_32) 110 111ENTRY(_atomic_or_32_nv) 112 movl (%rdi), %eax 1131: 114 movl %eax, %ecx 115 orl %esi, %ecx 116 LOCK 117 cmpxchgl %ecx, (%rdi) 118 jnz 1b 119 movl %ecx, %eax 120 ret 121END(_atomic_or_32_nv) 122 123ENTRY(_atomic_swap_32) 124 movl %esi, %eax 125 xchgl %eax, (%rdi) 126 ret 127END(_atomic_swap_32) 128 129ENTRY(_atomic_cas_32) 130 movl %esi, %eax 131 LOCK 132 cmpxchgl %edx, (%rdi) 133 /* %eax now contains the old value */ 134 ret 135END(_atomic_cas_32) 136 137ENTRY(_atomic_cas_32_ni) 138 movl %esi, %eax 139 cmpxchgl %edx, (%rdi) 140 /* %eax now contains the old value */ 141 ret 142END(_atomic_cas_32_ni) 143 144/* 64-bit */ 145 146ENTRY(_atomic_add_64) 147 LOCK 148 addq %rsi, (%rdi) 149 ret 150END(_atomic_add_64) 151 152ENTRY(_atomic_add_64_nv) 153 movq %rsi, %rax 154 LOCK 155 xaddq %rax, (%rdi) 156 addq %rsi, %rax 157 ret 158END(_atomic_add_64_nv) 159 160ENTRY(_atomic_and_64) 161 LOCK 162 andq %rsi, (%rdi) 163 ret 164END(_atomic_and_64) 165 166ENTRY(_atomic_and_64_nv) 167 movq (%rdi), %rax 1681: 169 movq %rax, %rcx 170 andq %rsi, %rcx 171 LOCK 172 cmpxchgq %rcx, (%rdi) 173 jnz 1b 174 movq %rcx, %rax 175 ret 176END(_atomic_and_64_nv) 177 178ENTRY(_atomic_dec_64) 179 LOCK 180 decq (%rdi) 181 ret 182END(_atomic_dec_64) 183 184ENTRY(_atomic_dec_64_nv) 185 movq $-1, %rax 186 LOCK 187 xaddq %rax, (%rdi) 188 decq %rax 189 ret 190END(_atomic_dec_64_nv) 191 192ENTRY(_atomic_inc_64) 193 LOCK 194 incq (%rdi) 195 ret 196END(_atomic_inc_64) 197 198ENTRY(_atomic_inc_64_nv) 199 movq $1, %rax 200 LOCK 201 xaddq %rax, (%rdi) 202 incq %rax 203 ret 204END(_atomic_inc_64_nv) 205 206ENTRY(_atomic_or_64) 207 LOCK 208 orq %rsi, (%rdi) 209 ret 210END(_atomic_or_64) 211 212ENTRY(_atomic_or_64_nv) 213 movq (%rdi), %rax 2141: 215 movq %rax, %rcx 216 orq %rsi, %rcx 217 LOCK 218 cmpxchgq %rcx, (%rdi) 219 jnz 1b 220 movq %rcx, %rax 221 ret 222END(_atomic_or_64_nv) 223 224ENTRY(_atomic_swap_64) 225 movq %rsi, %rax 226 xchgq %rax, (%rdi) 227 ret 228END(_atomic_swap_64) 229 230ENTRY(_atomic_cas_64) 231 movq %rsi, %rax 232 LOCK 233 cmpxchgq %rdx, (%rdi) 234 /* %eax now contains the old value */ 235 ret 236END(_atomic_cas_64) 237 238ENTRY(_atomic_cas_64_ni) 239 movq %rsi, %rax 240 cmpxchgq %rdx, (%rdi) 241 /* %eax now contains the old value */ 242 ret 243END(_atomic_cas_64_ni) 244 245/* memory barriers */ 246 247ENTRY(_membar_acquire) 248 /* 249 * Every load from normal memory is a load-acquire on x86, so 250 * there is never any need for explicit barriers to order 251 * load-before-anything. 252 */ 253 ret 254END(_membar_acquire) 255 256ENTRY(_membar_release) 257 /* 258 * Every store to normal memory is a store-release on x86, so 259 * there is never any need for explicit barriers to order 260 * anything-before-store. 261 */ 262 ret 263END(_membar_release) 264 265ENTRY(_membar_sync) 266 /* 267 * MFENCE, or a serializing instruction like a locked ADDQ, 268 * is necessary to order store-before-load. Every other 269 * ordering -- load-before-anything, anything-before-store -- 270 * is already guaranteed without explicit barriers. 271 * 272 * Empirically it turns out locked ADDQ is cheaper than MFENCE, 273 * so we use that, with an offset below the return address on 274 * the stack to avoid a false dependency with RET. (It might 275 * even be better to use a much lower offset, say -128, to 276 * avoid false dependencies for subsequent callees of the 277 * caller.) 278 * 279 * https://pvk.ca/Blog/2014/10/19/performance-optimisation-~-writing-an-essay/ 280 * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/ 281 * https://www.agner.org/optimize/instruction_tables.pdf 282 */ 283 LOCK 284 addq $0, -8(%rsp) 285 ret 286END(_membar_sync) 287 288ALIAS(atomic_add_32,_atomic_add_32) 289ALIAS(atomic_add_64,_atomic_add_64) 290ALIAS(atomic_add_int,_atomic_add_32) 291ALIAS(atomic_add_long,_atomic_add_64) 292ALIAS(atomic_add_ptr,_atomic_add_64) 293 294ALIAS(atomic_add_32_nv,_atomic_add_32_nv) 295ALIAS(atomic_add_64_nv,_atomic_add_64_nv) 296ALIAS(atomic_add_int_nv,_atomic_add_32_nv) 297ALIAS(atomic_add_long_nv,_atomic_add_64_nv) 298ALIAS(atomic_add_ptr_nv,_atomic_add_64_nv) 299 300ALIAS(atomic_and_32,_atomic_and_32) 301ALIAS(atomic_and_64,_atomic_and_64) 302ALIAS(atomic_and_uint,_atomic_and_32) 303ALIAS(atomic_and_ulong,_atomic_and_64) 304ALIAS(atomic_and_ptr,_atomic_and_64) 305 306ALIAS(atomic_and_32_nv,_atomic_and_32_nv) 307ALIAS(atomic_and_64_nv,_atomic_and_64_nv) 308ALIAS(atomic_and_uint_nv,_atomic_and_32_nv) 309ALIAS(atomic_and_ulong_nv,_atomic_and_64_nv) 310ALIAS(atomic_and_ptr_nv,_atomic_and_64_nv) 311 312ALIAS(atomic_dec_32,_atomic_dec_32) 313ALIAS(atomic_dec_64,_atomic_dec_64) 314ALIAS(atomic_dec_uint,_atomic_dec_32) 315ALIAS(atomic_dec_ulong,_atomic_dec_64) 316ALIAS(atomic_dec_ptr,_atomic_dec_64) 317 318ALIAS(atomic_dec_32_nv,_atomic_dec_32_nv) 319ALIAS(atomic_dec_64_nv,_atomic_dec_64_nv) 320ALIAS(atomic_dec_uint_nv,_atomic_dec_32_nv) 321ALIAS(atomic_dec_ulong_nv,_atomic_dec_64_nv) 322ALIAS(atomic_dec_ptr_nv,_atomic_dec_64_nv) 323 324ALIAS(atomic_inc_32,_atomic_inc_32) 325ALIAS(atomic_inc_64,_atomic_inc_64) 326ALIAS(atomic_inc_uint,_atomic_inc_32) 327ALIAS(atomic_inc_ulong,_atomic_inc_64) 328ALIAS(atomic_inc_ptr,_atomic_inc_64) 329 330ALIAS(atomic_inc_32_nv,_atomic_inc_32_nv) 331ALIAS(atomic_inc_64_nv,_atomic_inc_64_nv) 332ALIAS(atomic_inc_uint_nv,_atomic_inc_32_nv) 333ALIAS(atomic_inc_ulong_nv,_atomic_inc_64_nv) 334ALIAS(atomic_inc_ptr_nv,_atomic_inc_64_nv) 335 336ALIAS(atomic_or_32,_atomic_or_32) 337ALIAS(atomic_or_64,_atomic_or_64) 338ALIAS(atomic_or_uint,_atomic_or_32) 339ALIAS(atomic_or_ulong,_atomic_or_64) 340ALIAS(atomic_or_ptr,_atomic_or_64) 341 342ALIAS(atomic_or_32_nv,_atomic_or_32_nv) 343ALIAS(atomic_or_64_nv,_atomic_or_64_nv) 344ALIAS(atomic_or_uint_nv,_atomic_or_32_nv) 345ALIAS(atomic_or_ulong_nv,_atomic_or_64_nv) 346ALIAS(atomic_or_ptr_nv,_atomic_or_64_nv) 347 348ALIAS(atomic_swap_32,_atomic_swap_32) 349ALIAS(atomic_swap_64,_atomic_swap_64) 350ALIAS(atomic_swap_uint,_atomic_swap_32) 351ALIAS(atomic_swap_ulong,_atomic_swap_64) 352ALIAS(atomic_swap_ptr,_atomic_swap_64) 353 354ALIAS(atomic_cas_32,_atomic_cas_32) 355ALIAS(atomic_cas_64,_atomic_cas_64) 356ALIAS(atomic_cas_uint,_atomic_cas_32) 357ALIAS(atomic_cas_ulong,_atomic_cas_64) 358ALIAS(atomic_cas_ptr,_atomic_cas_64) 359 360ALIAS(atomic_cas_32_ni,_atomic_cas_32_ni) 361ALIAS(atomic_cas_64_ni,_atomic_cas_64_ni) 362ALIAS(atomic_cas_uint_ni,_atomic_cas_32_ni) 363ALIAS(atomic_cas_ulong_ni,_atomic_cas_64_ni) 364ALIAS(atomic_cas_ptr_ni,_atomic_cas_64_ni) 365 366ALIAS(membar_acquire,_membar_acquire) 367ALIAS(membar_release,_membar_release) 368ALIAS(membar_sync,_membar_sync) 369 370ALIAS(membar_consumer,_membar_acquire) 371ALIAS(membar_producer,_membar_release) 372ALIAS(membar_enter,_membar_sync) 373ALIAS(membar_exit,_membar_release) 374ALIAS(membar_sync,_membar_sync) 375 376STRONG_ALIAS(_atomic_add_int,_atomic_add_32) 377STRONG_ALIAS(_atomic_add_long,_atomic_add_64) 378STRONG_ALIAS(_atomic_add_ptr,_atomic_add_64) 379 380STRONG_ALIAS(_atomic_add_int_nv,_atomic_add_32_nv) 381STRONG_ALIAS(_atomic_add_long_nv,_atomic_add_64_nv) 382STRONG_ALIAS(_atomic_add_ptr_nv,_atomic_add_64_nv) 383 384STRONG_ALIAS(_atomic_and_uint,_atomic_and_32) 385STRONG_ALIAS(_atomic_and_ulong,_atomic_and_64) 386STRONG_ALIAS(_atomic_and_ptr,_atomic_and_64) 387 388STRONG_ALIAS(_atomic_and_uint_nv,_atomic_and_32_nv) 389STRONG_ALIAS(_atomic_and_ulong_nv,_atomic_and_64_nv) 390STRONG_ALIAS(_atomic_and_ptr_nv,_atomic_and_64_nv) 391 392STRONG_ALIAS(_atomic_dec_uint,_atomic_dec_32) 393STRONG_ALIAS(_atomic_dec_ulong,_atomic_dec_64) 394STRONG_ALIAS(_atomic_dec_ptr,_atomic_dec_64) 395 396STRONG_ALIAS(_atomic_dec_uint_nv,_atomic_dec_32_nv) 397STRONG_ALIAS(_atomic_dec_ulong_nv,_atomic_dec_64_nv) 398STRONG_ALIAS(_atomic_dec_ptr_nv,_atomic_dec_64_nv) 399 400STRONG_ALIAS(_atomic_inc_uint,_atomic_inc_32) 401STRONG_ALIAS(_atomic_inc_ulong,_atomic_inc_64) 402STRONG_ALIAS(_atomic_inc_ptr,_atomic_inc_64) 403 404STRONG_ALIAS(_atomic_inc_uint_nv,_atomic_inc_32_nv) 405STRONG_ALIAS(_atomic_inc_ulong_nv,_atomic_inc_64_nv) 406STRONG_ALIAS(_atomic_inc_ptr_nv,_atomic_inc_64_nv) 407 408STRONG_ALIAS(_atomic_or_uint,_atomic_or_32) 409STRONG_ALIAS(_atomic_or_ulong,_atomic_or_64) 410STRONG_ALIAS(_atomic_or_ptr,_atomic_or_64) 411 412STRONG_ALIAS(_atomic_or_uint_nv,_atomic_or_32_nv) 413STRONG_ALIAS(_atomic_or_ulong_nv,_atomic_or_64_nv) 414STRONG_ALIAS(_atomic_or_ptr_nv,_atomic_or_64_nv) 415 416STRONG_ALIAS(_atomic_swap_uint,_atomic_swap_32) 417STRONG_ALIAS(_atomic_swap_ulong,_atomic_swap_64) 418STRONG_ALIAS(_atomic_swap_ptr,_atomic_swap_64) 419 420STRONG_ALIAS(_atomic_cas_uint,_atomic_cas_32) 421STRONG_ALIAS(_atomic_cas_ulong,_atomic_cas_64) 422STRONG_ALIAS(_atomic_cas_ptr,_atomic_cas_64) 423 424STRONG_ALIAS(_atomic_cas_uint_ni,_atomic_cas_32_ni) 425STRONG_ALIAS(_atomic_cas_ulong_ni,_atomic_cas_64_ni) 426STRONG_ALIAS(_atomic_cas_ptr_ni,_atomic_cas_64_ni) 427 428STRONG_ALIAS(_membar_consumer,_membar_acquire) 429STRONG_ALIAS(_membar_producer,_membar_release) 430STRONG_ALIAS(_membar_enter,_membar_sync) 431STRONG_ALIAS(_membar_exit,_membar_release) 432