1/* $NetBSD: atomic.S,v 1.36 2022/07/30 14:11:00 riastradh Exp $ */ 2 3/*- 4 * Copyright (c) 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <sys/param.h> 33#include <machine/asm.h> 34/* 35 * __HAVE_ constants should not be in <machine/types.h> 36 * because we can't use them from assembly. OTOH we 37 * only need __HAVE_ATOMIC64_OPS here, and we don't. 38 */ 39#ifdef _KERNEL 40#define ALIAS(f, t) STRONG_ALIAS(f,t) 41#else 42#define ALIAS(f, t) WEAK_ALIAS(f,t) 43#endif 44 45#ifdef _HARDKERNEL 46#include "opt_xen.h" 47#include <machine/frameasm.h> 48#define LOCK HOTPATCH(HP_NAME_NOLOCK, 1); lock 49#define HOTPATCH_CAS_64 HOTPATCH(HP_NAME_CAS_64, 49); 50#else 51#define LOCK lock 52#define HOTPATCH_CAS_64 /* nothing */ 53#endif 54 55 .text 56 57ENTRY(_atomic_add_32) 58 movl 4(%esp), %edx 59 movl 8(%esp), %eax 60 LOCK 61 addl %eax, (%edx) 62 ret 63END(_atomic_add_32) 64 65ENTRY(_atomic_add_32_nv) 66 movl 4(%esp), %edx 67 movl 8(%esp), %eax 68 movl %eax, %ecx 69 LOCK 70 xaddl %eax, (%edx) 71 addl %ecx, %eax 72 ret 73END(_atomic_add_32_nv) 74 75ENTRY(_atomic_and_32) 76 movl 4(%esp), %edx 77 movl 8(%esp), %eax 78 LOCK 79 andl %eax, (%edx) 80 ret 81END(_atomic_and_32) 82 83ENTRY(_atomic_and_32_nv) 84 movl 4(%esp), %edx 85 movl (%edx), %eax 860: 87 movl %eax, %ecx 88 andl 8(%esp), %ecx 89 LOCK 90 cmpxchgl %ecx, (%edx) 91 jnz 1f 92 movl %ecx, %eax 93 ret 941: 95 jmp 0b 96END(_atomic_and_32_nv) 97 98ENTRY(_atomic_dec_32) 99 movl 4(%esp), %edx 100 LOCK 101 decl (%edx) 102 ret 103END(_atomic_dec_32) 104 105ENTRY(_atomic_dec_32_nv) 106 movl 4(%esp), %edx 107 movl $-1, %eax 108 LOCK 109 xaddl %eax, (%edx) 110 decl %eax 111 ret 112END(_atomic_dec_32_nv) 113 114ENTRY(_atomic_inc_32) 115 movl 4(%esp), %edx 116 LOCK 117 incl (%edx) 118 ret 119END(_atomic_inc_32) 120 121ENTRY(_atomic_inc_32_nv) 122 movl 4(%esp), %edx 123 movl $1, %eax 124 LOCK 125 xaddl %eax, (%edx) 126 incl %eax 127 ret 128END(_atomic_inc_32_nv) 129 130ENTRY(_atomic_or_32) 131 movl 4(%esp), %edx 132 movl 8(%esp), %eax 133 LOCK 134 orl %eax, (%edx) 135 ret 136END(_atomic_or_32) 137 138ENTRY(_atomic_or_32_nv) 139 movl 4(%esp), %edx 140 movl (%edx), %eax 1410: 142 movl %eax, %ecx 143 orl 8(%esp), %ecx 144 LOCK 145 cmpxchgl %ecx, (%edx) 146 jnz 1f 147 movl %ecx, %eax 148 ret 1491: 150 jmp 0b 151END(_atomic_or_32_nv) 152 153ENTRY(_atomic_swap_32) 154 movl 4(%esp), %edx 155 movl 8(%esp), %eax 156 xchgl %eax, (%edx) 157 ret 158END(_atomic_swap_32) 159 160ENTRY(_atomic_cas_32) 161 movl 4(%esp), %edx 162 movl 8(%esp), %eax 163 movl 12(%esp), %ecx 164 LOCK 165 cmpxchgl %ecx, (%edx) 166 /* %eax now contains the old value */ 167 ret 168END(_atomic_cas_32) 169 170ENTRY(_atomic_cas_32_ni) 171 movl 4(%esp), %edx 172 movl 8(%esp), %eax 173 movl 12(%esp), %ecx 174 cmpxchgl %ecx, (%edx) 175 /* %eax now contains the old value */ 176 ret 177END(_atomic_cas_32_ni) 178 179ENTRY(_membar_acquire) 180 /* 181 * Every load from normal memory is a load-acquire on x86, so 182 * there is never any need for explicit barriers to order 183 * load-before-anything. 184 */ 185 ret 186END(_membar_acquire) 187 188ENTRY(_membar_release) 189 /* 190 * Every store to normal memory is a store-release on x86, so 191 * there is never any need for explicit barriers to order 192 * anything-before-store. 193 */ 194 ret 195END(_membar_release) 196 197ENTRY(_membar_sync) 198 /* 199 * MFENCE, or a serializing instruction like a locked ADDL, 200 * is necessary to order store-before-load. Every other 201 * ordering -- load-before-anything, anything-before-store -- 202 * is already guaranteed without explicit barriers. 203 * 204 * Empirically it turns out locked ADDL is cheaper than MFENCE, 205 * so we use that, with an offset below the return address on 206 * the stack to avoid a false dependency with RET. (It might 207 * even be better to use a much lower offset, say -128, to 208 * avoid false dependencies for subsequent callees of the 209 * caller.) 210 * 211 * https://pvk.ca/Blog/2014/10/19/performance-optimisation-~-writing-an-essay/ 212 * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/ 213 * https://www.agner.org/optimize/instruction_tables.pdf 214 */ 215 LOCK 216 addl $0, -4(%esp) 217 ret 218END(_membar_sync) 219 220#if defined(__HAVE_ATOMIC64_OPS) || defined(_KERNEL) 221#ifdef XENPV 222STRONG_ALIAS(_atomic_cas_64,_atomic_cas_cx8) 223#else 224ENTRY(_atomic_cas_64) 225 HOTPATCH_CAS_64 226 /* 49 bytes of instructions */ 227#ifdef _HARDKERNEL 228 pushf 229 cli 230#endif 231 pushl %edi 232 pushl %ebx 233 movl 12(%esp), %edi 234 movl 16(%esp), %eax 235 movl 20(%esp), %edx 236 movl 24(%esp), %ebx 237 movl 28(%esp), %ecx 238 cmpl 0(%edi), %eax 239 jne 2f 240 cmpl 4(%edi), %edx 241 jne 2f 242 movl %ebx, 0(%edi) 243 movl %ecx, 4(%edi) 2441: 245 popl %ebx 246 popl %edi 247#ifdef _HARDKERNEL 248 popf 249#endif 250 ret 2512: 252 movl 0(%edi), %eax 253 movl 4(%edi), %edx 254 jmp 1b 255END(_atomic_cas_64) 256#endif /* !XENPV */ 257 258ENTRY(_atomic_cas_cx8) 259 /* 49 bytes of instructions */ 260 pushl %edi 261 pushl %ebx 262 movl 12(%esp), %edi 263 movl 16(%esp), %eax 264 movl 20(%esp), %edx 265 movl 24(%esp), %ebx 266 movl 28(%esp), %ecx 267 LOCK 268 cmpxchg8b (%edi) 269 popl %ebx 270 popl %edi 271 ret 272#ifdef _HARDKERNEL 273 .space 20, 0xCC 274#endif 275END(_atomic_cas_cx8) 276LABEL(_atomic_cas_cx8_end) 277#endif /* __HAVE_ATOMIC64_OPS || _KERNEL */ 278 279ALIAS(atomic_add_32,_atomic_add_32) 280ALIAS(atomic_add_int,_atomic_add_32) 281ALIAS(atomic_add_long,_atomic_add_32) 282ALIAS(atomic_add_ptr,_atomic_add_32) 283 284ALIAS(atomic_add_32_nv,_atomic_add_32_nv) 285ALIAS(atomic_add_int_nv,_atomic_add_32_nv) 286ALIAS(atomic_add_long_nv,_atomic_add_32_nv) 287ALIAS(atomic_add_ptr_nv,_atomic_add_32_nv) 288 289ALIAS(atomic_and_32,_atomic_and_32) 290ALIAS(atomic_and_uint,_atomic_and_32) 291ALIAS(atomic_and_ulong,_atomic_and_32) 292ALIAS(atomic_and_ptr,_atomic_and_32) 293 294ALIAS(atomic_and_32_nv,_atomic_and_32_nv) 295ALIAS(atomic_and_uint_nv,_atomic_and_32_nv) 296ALIAS(atomic_and_ulong_nv,_atomic_and_32_nv) 297ALIAS(atomic_and_ptr_nv,_atomic_and_32_nv) 298 299ALIAS(atomic_dec_32,_atomic_dec_32) 300ALIAS(atomic_dec_uint,_atomic_dec_32) 301ALIAS(atomic_dec_ulong,_atomic_dec_32) 302ALIAS(atomic_dec_ptr,_atomic_dec_32) 303 304ALIAS(atomic_dec_32_nv,_atomic_dec_32_nv) 305ALIAS(atomic_dec_uint_nv,_atomic_dec_32_nv) 306ALIAS(atomic_dec_ulong_nv,_atomic_dec_32_nv) 307ALIAS(atomic_dec_ptr_nv,_atomic_dec_32_nv) 308 309ALIAS(atomic_inc_32,_atomic_inc_32) 310ALIAS(atomic_inc_uint,_atomic_inc_32) 311ALIAS(atomic_inc_ulong,_atomic_inc_32) 312ALIAS(atomic_inc_ptr,_atomic_inc_32) 313 314ALIAS(atomic_inc_32_nv,_atomic_inc_32_nv) 315ALIAS(atomic_inc_uint_nv,_atomic_inc_32_nv) 316ALIAS(atomic_inc_ulong_nv,_atomic_inc_32_nv) 317ALIAS(atomic_inc_ptr_nv,_atomic_inc_32_nv) 318 319ALIAS(atomic_or_32,_atomic_or_32) 320ALIAS(atomic_or_uint,_atomic_or_32) 321ALIAS(atomic_or_ulong,_atomic_or_32) 322ALIAS(atomic_or_ptr,_atomic_or_32) 323 324ALIAS(atomic_or_32_nv,_atomic_or_32_nv) 325ALIAS(atomic_or_uint_nv,_atomic_or_32_nv) 326ALIAS(atomic_or_ulong_nv,_atomic_or_32_nv) 327ALIAS(atomic_or_ptr_nv,_atomic_or_32_nv) 328 329ALIAS(atomic_swap_32,_atomic_swap_32) 330ALIAS(atomic_swap_uint,_atomic_swap_32) 331ALIAS(atomic_swap_ulong,_atomic_swap_32) 332ALIAS(atomic_swap_ptr,_atomic_swap_32) 333 334ALIAS(atomic_cas_32,_atomic_cas_32) 335ALIAS(atomic_cas_uint,_atomic_cas_32) 336ALIAS(atomic_cas_ulong,_atomic_cas_32) 337ALIAS(atomic_cas_ptr,_atomic_cas_32) 338 339ALIAS(atomic_cas_32_ni,_atomic_cas_32_ni) 340ALIAS(atomic_cas_uint_ni,_atomic_cas_32_ni) 341ALIAS(atomic_cas_ulong_ni,_atomic_cas_32_ni) 342ALIAS(atomic_cas_ptr_ni,_atomic_cas_32_ni) 343 344#if defined(__HAVE_ATOMIC64_OPS) || defined(_KERNEL) 345ALIAS(atomic_cas_64,_atomic_cas_64) 346ALIAS(atomic_cas_64_ni,_atomic_cas_64) 347ALIAS(__sync_val_compare_and_swap_8,_atomic_cas_64) 348#endif /* __HAVE_ATOMIC64_OPS || _KERNEL */ 349 350ALIAS(membar_acquire,_membar_acquire) 351ALIAS(membar_release,_membar_release) 352ALIAS(membar_sync,_membar_sync) 353 354ALIAS(membar_consumer,_membar_acquire) 355ALIAS(membar_producer,_membar_release) 356ALIAS(membar_enter,_membar_sync) 357ALIAS(membar_exit,_membar_release) 358ALIAS(membar_sync,_membar_sync) 359 360STRONG_ALIAS(_atomic_add_int,_atomic_add_32) 361STRONG_ALIAS(_atomic_add_long,_atomic_add_32) 362STRONG_ALIAS(_atomic_add_ptr,_atomic_add_32) 363 364STRONG_ALIAS(_atomic_add_int_nv,_atomic_add_32_nv) 365STRONG_ALIAS(_atomic_add_long_nv,_atomic_add_32_nv) 366STRONG_ALIAS(_atomic_add_ptr_nv,_atomic_add_32_nv) 367 368STRONG_ALIAS(_atomic_and_uint,_atomic_and_32) 369STRONG_ALIAS(_atomic_and_ulong,_atomic_and_32) 370STRONG_ALIAS(_atomic_and_ptr,_atomic_and_32) 371 372STRONG_ALIAS(_atomic_and_uint_nv,_atomic_and_32_nv) 373STRONG_ALIAS(_atomic_and_ulong_nv,_atomic_and_32_nv) 374STRONG_ALIAS(_atomic_and_ptr_nv,_atomic_and_32_nv) 375 376STRONG_ALIAS(_atomic_dec_uint,_atomic_dec_32) 377STRONG_ALIAS(_atomic_dec_ulong,_atomic_dec_32) 378STRONG_ALIAS(_atomic_dec_ptr,_atomic_dec_32) 379 380STRONG_ALIAS(_atomic_dec_uint_nv,_atomic_dec_32_nv) 381STRONG_ALIAS(_atomic_dec_ulong_nv,_atomic_dec_32_nv) 382STRONG_ALIAS(_atomic_dec_ptr_nv,_atomic_dec_32_nv) 383 384STRONG_ALIAS(_atomic_inc_uint,_atomic_inc_32) 385STRONG_ALIAS(_atomic_inc_ulong,_atomic_inc_32) 386STRONG_ALIAS(_atomic_inc_ptr,_atomic_inc_32) 387 388STRONG_ALIAS(_atomic_inc_uint_nv,_atomic_inc_32_nv) 389STRONG_ALIAS(_atomic_inc_ulong_nv,_atomic_inc_32_nv) 390STRONG_ALIAS(_atomic_inc_ptr_nv,_atomic_inc_32_nv) 391 392STRONG_ALIAS(_atomic_or_uint,_atomic_or_32) 393STRONG_ALIAS(_atomic_or_ulong,_atomic_or_32) 394STRONG_ALIAS(_atomic_or_ptr,_atomic_or_32) 395 396STRONG_ALIAS(_atomic_or_uint_nv,_atomic_or_32_nv) 397STRONG_ALIAS(_atomic_or_ulong_nv,_atomic_or_32_nv) 398STRONG_ALIAS(_atomic_or_ptr_nv,_atomic_or_32_nv) 399 400STRONG_ALIAS(_atomic_swap_uint,_atomic_swap_32) 401STRONG_ALIAS(_atomic_swap_ulong,_atomic_swap_32) 402STRONG_ALIAS(_atomic_swap_ptr,_atomic_swap_32) 403 404STRONG_ALIAS(_atomic_cas_uint,_atomic_cas_32) 405STRONG_ALIAS(_atomic_cas_ulong,_atomic_cas_32) 406STRONG_ALIAS(_atomic_cas_ptr,_atomic_cas_32) 407 408STRONG_ALIAS(_atomic_cas_uint_ni,_atomic_cas_32_ni) 409STRONG_ALIAS(_atomic_cas_ulong_ni,_atomic_cas_32_ni) 410STRONG_ALIAS(_atomic_cas_ptr_ni,_atomic_cas_32_ni) 411 412STRONG_ALIAS(_membar_consumer,_membar_acquire) 413STRONG_ALIAS(_membar_producer,_membar_release) 414STRONG_ALIAS(_membar_enter,_membar_sync) 415STRONG_ALIAS(_membar_exit,_membar_release) 416