1/*	$NetBSD: atomic.S,v 1.29 2022/07/30 14:11:00 riastradh Exp $	*/
2
3/*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe, and by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <sys/param.h>
33#include <machine/asm.h>
34
35#ifdef _KERNEL
36#define	ALIAS(f, t)	STRONG_ALIAS(f,t)
37#else
38#define	ALIAS(f, t)	WEAK_ALIAS(f,t)
39#endif
40
41	.text
42
43/* 32-bit */
44
45ENTRY(_atomic_add_32)
46	LOCK
47	addl	%esi, (%rdi)
48	ret
49END(_atomic_add_32)
50
51ENTRY(_atomic_add_32_nv)
52	movl	%esi, %eax
53	LOCK
54	xaddl	%eax, (%rdi)
55	addl	%esi, %eax
56	ret
57END(_atomic_add_32_nv)
58
59ENTRY(_atomic_and_32)
60	LOCK
61	andl	%esi, (%rdi)
62	ret
63END(_atomic_and_32)
64
65ENTRY(_atomic_and_32_nv)
66	movl	(%rdi), %eax
671:
68	movl	%eax, %ecx
69	andl	%esi, %ecx
70	LOCK
71	cmpxchgl %ecx, (%rdi)
72	jnz	1b
73	movl	%ecx, %eax
74	ret
75END(_atomic_and_32_nv)
76
77ENTRY(_atomic_dec_32)
78	LOCK
79	decl	(%rdi)
80	ret
81END(_atomic_dec_32)
82
83ENTRY(_atomic_dec_32_nv)
84	movl	$-1, %eax
85	LOCK
86	xaddl	%eax, (%rdi)
87	decl	%eax
88	ret
89END(_atomic_dec_32_nv)
90
91ENTRY(_atomic_inc_32)
92	LOCK
93	incl	(%rdi)
94	ret
95END(_atomic_inc_32)
96
97ENTRY(_atomic_inc_32_nv)
98	movl	$1, %eax
99	LOCK
100	xaddl	%eax, (%rdi)
101	incl	%eax
102	ret
103END(_atomic_inc_32_nv)
104
105ENTRY(_atomic_or_32)
106	LOCK
107	orl	%esi, (%rdi)
108	ret
109END(_atomic_or_32)
110
111ENTRY(_atomic_or_32_nv)
112	movl	(%rdi), %eax
1131:
114	movl	%eax, %ecx
115	orl	%esi, %ecx
116	LOCK
117	cmpxchgl %ecx, (%rdi)
118	jnz	1b
119	movl	%ecx, %eax
120	ret
121END(_atomic_or_32_nv)
122
123ENTRY(_atomic_swap_32)
124	movl	%esi, %eax
125	xchgl	%eax, (%rdi)
126	ret
127END(_atomic_swap_32)
128
129ENTRY(_atomic_cas_32)
130	movl	%esi, %eax
131	LOCK
132	cmpxchgl %edx, (%rdi)
133	/* %eax now contains the old value */
134	ret
135END(_atomic_cas_32)
136
137ENTRY(_atomic_cas_32_ni)
138	movl	%esi, %eax
139	cmpxchgl %edx, (%rdi)
140	/* %eax now contains the old value */
141	ret
142END(_atomic_cas_32_ni)
143
144/* 64-bit */
145
146ENTRY(_atomic_add_64)
147	LOCK
148	addq	%rsi, (%rdi)
149	ret
150END(_atomic_add_64)
151
152ENTRY(_atomic_add_64_nv)
153	movq	%rsi, %rax
154	LOCK
155	xaddq	%rax, (%rdi)
156	addq	%rsi, %rax
157	ret
158END(_atomic_add_64_nv)
159
160ENTRY(_atomic_and_64)
161	LOCK
162	andq	%rsi, (%rdi)
163	ret
164END(_atomic_and_64)
165
166ENTRY(_atomic_and_64_nv)
167	movq	(%rdi), %rax
1681:
169	movq	%rax, %rcx
170	andq	%rsi, %rcx
171	LOCK
172	cmpxchgq %rcx, (%rdi)
173	jnz	1b
174	movq	%rcx, %rax
175	ret
176END(_atomic_and_64_nv)
177
178ENTRY(_atomic_dec_64)
179	LOCK
180	decq	(%rdi)
181	ret
182END(_atomic_dec_64)
183
184ENTRY(_atomic_dec_64_nv)
185	movq	$-1, %rax
186	LOCK
187	xaddq	%rax, (%rdi)
188	decq	%rax
189	ret
190END(_atomic_dec_64_nv)
191
192ENTRY(_atomic_inc_64)
193	LOCK
194	incq	(%rdi)
195	ret
196END(_atomic_inc_64)
197
198ENTRY(_atomic_inc_64_nv)
199	movq	$1, %rax
200	LOCK
201	xaddq	%rax, (%rdi)
202	incq	%rax
203	ret
204END(_atomic_inc_64_nv)
205
206ENTRY(_atomic_or_64)
207	LOCK
208	orq	%rsi, (%rdi)
209	ret
210END(_atomic_or_64)
211
212ENTRY(_atomic_or_64_nv)
213	movq	(%rdi), %rax
2141:
215	movq	%rax, %rcx
216	orq	%rsi, %rcx
217	LOCK
218	cmpxchgq %rcx, (%rdi)
219	jnz	1b
220	movq	%rcx, %rax
221	ret
222END(_atomic_or_64_nv)
223
224ENTRY(_atomic_swap_64)
225	movq	%rsi, %rax
226	xchgq	%rax, (%rdi)
227	ret
228END(_atomic_swap_64)
229
230ENTRY(_atomic_cas_64)
231	movq	%rsi, %rax
232	LOCK
233	cmpxchgq %rdx, (%rdi)
234	/* %eax now contains the old value */
235	ret
236END(_atomic_cas_64)
237
238ENTRY(_atomic_cas_64_ni)
239	movq	%rsi, %rax
240	cmpxchgq %rdx, (%rdi)
241	/* %eax now contains the old value */
242	ret
243END(_atomic_cas_64_ni)
244
245/* memory barriers */
246
247ENTRY(_membar_acquire)
248	/*
249	 * Every load from normal memory is a load-acquire on x86, so
250	 * there is never any need for explicit barriers to order
251	 * load-before-anything.
252	 */
253	ret
254END(_membar_acquire)
255
256ENTRY(_membar_release)
257	/*
258	 * Every store to normal memory is a store-release on x86, so
259	 * there is never any need for explicit barriers to order
260	 * anything-before-store.
261	 */
262	ret
263END(_membar_release)
264
265ENTRY(_membar_sync)
266	/*
267	 * MFENCE, or a serializing instruction like a locked ADDQ,
268	 * is necessary to order store-before-load.  Every other
269	 * ordering -- load-before-anything, anything-before-store --
270	 * is already guaranteed without explicit barriers.
271	 *
272	 * Empirically it turns out locked ADDQ is cheaper than MFENCE,
273	 * so we use that, with an offset below the return address on
274	 * the stack to avoid a false dependency with RET.  (It might
275	 * even be better to use a much lower offset, say -128, to
276	 * avoid false dependencies for subsequent callees of the
277	 * caller.)
278	 *
279	 * https://pvk.ca/Blog/2014/10/19/performance-optimisation-~-writing-an-essay/
280	 * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
281	 * https://www.agner.org/optimize/instruction_tables.pdf
282	 */
283	LOCK
284	addq	$0, -8(%rsp)
285	ret
286END(_membar_sync)
287
288ALIAS(atomic_add_32,_atomic_add_32)
289ALIAS(atomic_add_64,_atomic_add_64)
290ALIAS(atomic_add_int,_atomic_add_32)
291ALIAS(atomic_add_long,_atomic_add_64)
292ALIAS(atomic_add_ptr,_atomic_add_64)
293
294ALIAS(atomic_add_32_nv,_atomic_add_32_nv)
295ALIAS(atomic_add_64_nv,_atomic_add_64_nv)
296ALIAS(atomic_add_int_nv,_atomic_add_32_nv)
297ALIAS(atomic_add_long_nv,_atomic_add_64_nv)
298ALIAS(atomic_add_ptr_nv,_atomic_add_64_nv)
299
300ALIAS(atomic_and_32,_atomic_and_32)
301ALIAS(atomic_and_64,_atomic_and_64)
302ALIAS(atomic_and_uint,_atomic_and_32)
303ALIAS(atomic_and_ulong,_atomic_and_64)
304ALIAS(atomic_and_ptr,_atomic_and_64)
305
306ALIAS(atomic_and_32_nv,_atomic_and_32_nv)
307ALIAS(atomic_and_64_nv,_atomic_and_64_nv)
308ALIAS(atomic_and_uint_nv,_atomic_and_32_nv)
309ALIAS(atomic_and_ulong_nv,_atomic_and_64_nv)
310ALIAS(atomic_and_ptr_nv,_atomic_and_64_nv)
311
312ALIAS(atomic_dec_32,_atomic_dec_32)
313ALIAS(atomic_dec_64,_atomic_dec_64)
314ALIAS(atomic_dec_uint,_atomic_dec_32)
315ALIAS(atomic_dec_ulong,_atomic_dec_64)
316ALIAS(atomic_dec_ptr,_atomic_dec_64)
317
318ALIAS(atomic_dec_32_nv,_atomic_dec_32_nv)
319ALIAS(atomic_dec_64_nv,_atomic_dec_64_nv)
320ALIAS(atomic_dec_uint_nv,_atomic_dec_32_nv)
321ALIAS(atomic_dec_ulong_nv,_atomic_dec_64_nv)
322ALIAS(atomic_dec_ptr_nv,_atomic_dec_64_nv)
323
324ALIAS(atomic_inc_32,_atomic_inc_32)
325ALIAS(atomic_inc_64,_atomic_inc_64)
326ALIAS(atomic_inc_uint,_atomic_inc_32)
327ALIAS(atomic_inc_ulong,_atomic_inc_64)
328ALIAS(atomic_inc_ptr,_atomic_inc_64)
329
330ALIAS(atomic_inc_32_nv,_atomic_inc_32_nv)
331ALIAS(atomic_inc_64_nv,_atomic_inc_64_nv)
332ALIAS(atomic_inc_uint_nv,_atomic_inc_32_nv)
333ALIAS(atomic_inc_ulong_nv,_atomic_inc_64_nv)
334ALIAS(atomic_inc_ptr_nv,_atomic_inc_64_nv)
335
336ALIAS(atomic_or_32,_atomic_or_32)
337ALIAS(atomic_or_64,_atomic_or_64)
338ALIAS(atomic_or_uint,_atomic_or_32)
339ALIAS(atomic_or_ulong,_atomic_or_64)
340ALIAS(atomic_or_ptr,_atomic_or_64)
341
342ALIAS(atomic_or_32_nv,_atomic_or_32_nv)
343ALIAS(atomic_or_64_nv,_atomic_or_64_nv)
344ALIAS(atomic_or_uint_nv,_atomic_or_32_nv)
345ALIAS(atomic_or_ulong_nv,_atomic_or_64_nv)
346ALIAS(atomic_or_ptr_nv,_atomic_or_64_nv)
347
348ALIAS(atomic_swap_32,_atomic_swap_32)
349ALIAS(atomic_swap_64,_atomic_swap_64)
350ALIAS(atomic_swap_uint,_atomic_swap_32)
351ALIAS(atomic_swap_ulong,_atomic_swap_64)
352ALIAS(atomic_swap_ptr,_atomic_swap_64)
353
354ALIAS(atomic_cas_32,_atomic_cas_32)
355ALIAS(atomic_cas_64,_atomic_cas_64)
356ALIAS(atomic_cas_uint,_atomic_cas_32)
357ALIAS(atomic_cas_ulong,_atomic_cas_64)
358ALIAS(atomic_cas_ptr,_atomic_cas_64)
359
360ALIAS(atomic_cas_32_ni,_atomic_cas_32_ni)
361ALIAS(atomic_cas_64_ni,_atomic_cas_64_ni)
362ALIAS(atomic_cas_uint_ni,_atomic_cas_32_ni)
363ALIAS(atomic_cas_ulong_ni,_atomic_cas_64_ni)
364ALIAS(atomic_cas_ptr_ni,_atomic_cas_64_ni)
365
366ALIAS(membar_acquire,_membar_acquire)
367ALIAS(membar_release,_membar_release)
368ALIAS(membar_sync,_membar_sync)
369
370ALIAS(membar_consumer,_membar_acquire)
371ALIAS(membar_producer,_membar_release)
372ALIAS(membar_enter,_membar_sync)
373ALIAS(membar_exit,_membar_release)
374ALIAS(membar_sync,_membar_sync)
375
376STRONG_ALIAS(_atomic_add_int,_atomic_add_32)
377STRONG_ALIAS(_atomic_add_long,_atomic_add_64)
378STRONG_ALIAS(_atomic_add_ptr,_atomic_add_64)
379
380STRONG_ALIAS(_atomic_add_int_nv,_atomic_add_32_nv)
381STRONG_ALIAS(_atomic_add_long_nv,_atomic_add_64_nv)
382STRONG_ALIAS(_atomic_add_ptr_nv,_atomic_add_64_nv)
383
384STRONG_ALIAS(_atomic_and_uint,_atomic_and_32)
385STRONG_ALIAS(_atomic_and_ulong,_atomic_and_64)
386STRONG_ALIAS(_atomic_and_ptr,_atomic_and_64)
387
388STRONG_ALIAS(_atomic_and_uint_nv,_atomic_and_32_nv)
389STRONG_ALIAS(_atomic_and_ulong_nv,_atomic_and_64_nv)
390STRONG_ALIAS(_atomic_and_ptr_nv,_atomic_and_64_nv)
391
392STRONG_ALIAS(_atomic_dec_uint,_atomic_dec_32)
393STRONG_ALIAS(_atomic_dec_ulong,_atomic_dec_64)
394STRONG_ALIAS(_atomic_dec_ptr,_atomic_dec_64)
395
396STRONG_ALIAS(_atomic_dec_uint_nv,_atomic_dec_32_nv)
397STRONG_ALIAS(_atomic_dec_ulong_nv,_atomic_dec_64_nv)
398STRONG_ALIAS(_atomic_dec_ptr_nv,_atomic_dec_64_nv)
399
400STRONG_ALIAS(_atomic_inc_uint,_atomic_inc_32)
401STRONG_ALIAS(_atomic_inc_ulong,_atomic_inc_64)
402STRONG_ALIAS(_atomic_inc_ptr,_atomic_inc_64)
403
404STRONG_ALIAS(_atomic_inc_uint_nv,_atomic_inc_32_nv)
405STRONG_ALIAS(_atomic_inc_ulong_nv,_atomic_inc_64_nv)
406STRONG_ALIAS(_atomic_inc_ptr_nv,_atomic_inc_64_nv)
407
408STRONG_ALIAS(_atomic_or_uint,_atomic_or_32)
409STRONG_ALIAS(_atomic_or_ulong,_atomic_or_64)
410STRONG_ALIAS(_atomic_or_ptr,_atomic_or_64)
411
412STRONG_ALIAS(_atomic_or_uint_nv,_atomic_or_32_nv)
413STRONG_ALIAS(_atomic_or_ulong_nv,_atomic_or_64_nv)
414STRONG_ALIAS(_atomic_or_ptr_nv,_atomic_or_64_nv)
415
416STRONG_ALIAS(_atomic_swap_uint,_atomic_swap_32)
417STRONG_ALIAS(_atomic_swap_ulong,_atomic_swap_64)
418STRONG_ALIAS(_atomic_swap_ptr,_atomic_swap_64)
419
420STRONG_ALIAS(_atomic_cas_uint,_atomic_cas_32)
421STRONG_ALIAS(_atomic_cas_ulong,_atomic_cas_64)
422STRONG_ALIAS(_atomic_cas_ptr,_atomic_cas_64)
423
424STRONG_ALIAS(_atomic_cas_uint_ni,_atomic_cas_32_ni)
425STRONG_ALIAS(_atomic_cas_ulong_ni,_atomic_cas_64_ni)
426STRONG_ALIAS(_atomic_cas_ptr_ni,_atomic_cas_64_ni)
427
428STRONG_ALIAS(_membar_consumer,_membar_acquire)
429STRONG_ALIAS(_membar_producer,_membar_release)
430STRONG_ALIAS(_membar_enter,_membar_sync)
431STRONG_ALIAS(_membar_exit,_membar_release)
432