1/*-
2 * Copyright (c) 2015-2024 Ruslan Bukin <br@bsdpad.com>
3 * All rights reserved.
4 *
5 * Portions of this software were developed by SRI International and the
6 * University of Cambridge Computer Laboratory under DARPA/AFRL contract
7 * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
8 *
9 * Portions of this software were developed by the University of Cambridge
10 * Computer Laboratory as part of the CTSRD Project, with support from the
11 * UK Higher Education Innovation Fund (HEIF).
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#ifndef	_MACHINE_ATOMIC_H_
36#define	_MACHINE_ATOMIC_H_
37
38#include <sys/atomic_common.h>
39
40#define	fence()	__asm __volatile("fence" ::: "memory");
41#define	mb()	fence()
42#define	rmb()	fence()
43#define	wmb()	fence()
44
45static __inline int atomic_cmpset_8(__volatile uint8_t *, uint8_t, uint8_t);
46static __inline int atomic_fcmpset_8(__volatile uint8_t *, uint8_t *, uint8_t);
47static __inline int atomic_cmpset_16(__volatile uint16_t *, uint16_t, uint16_t);
48static __inline int atomic_fcmpset_16(__volatile uint16_t *, uint16_t *,
49    uint16_t);
50
51#define	ATOMIC_ACQ_REL(NAME, WIDTH)					\
52static __inline  void							\
53atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
54{									\
55	atomic_##NAME##_##WIDTH(p, v);					\
56	fence(); 							\
57}									\
58									\
59static __inline  void							\
60atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
61{									\
62	fence();							\
63	atomic_##NAME##_##WIDTH(p, v);					\
64}
65
66#define	ATOMIC_CMPSET_ACQ_REL(WIDTH)					\
67static __inline  int							\
68atomic_cmpset_acq_##WIDTH(__volatile uint##WIDTH##_t *p,		\
69    uint##WIDTH##_t cmpval, uint##WIDTH##_t newval)			\
70{									\
71	int retval;							\
72									\
73	retval = atomic_cmpset_##WIDTH(p, cmpval, newval);		\
74	fence();							\
75	return (retval);						\
76}									\
77									\
78static __inline  int							\
79atomic_cmpset_rel_##WIDTH(__volatile uint##WIDTH##_t *p,		\
80    uint##WIDTH##_t cmpval, uint##WIDTH##_t newval)			\
81{									\
82	fence();							\
83	return (atomic_cmpset_##WIDTH(p, cmpval, newval));		\
84}
85
86#define	ATOMIC_FCMPSET_ACQ_REL(WIDTH)					\
87static __inline  int							\
88atomic_fcmpset_acq_##WIDTH(__volatile uint##WIDTH##_t *p,		\
89    uint##WIDTH##_t *cmpval, uint##WIDTH##_t newval)			\
90{									\
91	int retval;							\
92									\
93	retval = atomic_fcmpset_##WIDTH(p, cmpval, newval);		\
94	fence();							\
95	return (retval);						\
96}									\
97									\
98static __inline  int							\
99atomic_fcmpset_rel_##WIDTH(__volatile uint##WIDTH##_t *p,		\
100    uint##WIDTH##_t *cmpval, uint##WIDTH##_t newval)			\
101{									\
102	fence();							\
103	return (atomic_fcmpset_##WIDTH(p, cmpval, newval));		\
104}
105
106ATOMIC_CMPSET_ACQ_REL(8);
107ATOMIC_FCMPSET_ACQ_REL(8);
108
109#define	atomic_cmpset_char		atomic_cmpset_8
110#define	atomic_cmpset_acq_char		atomic_cmpset_acq_8
111#define	atomic_cmpset_rel_char		atomic_cmpset_rel_8
112#define	atomic_fcmpset_char		atomic_fcmpset_8
113#define	atomic_fcmpset_acq_char		atomic_fcmpset_acq_8
114#define	atomic_fcmpset_rel_char		atomic_fcmpset_rel_8
115
116#define	atomic_cmpset_short		atomic_cmpset_16
117#define	atomic_fcmpset_short		atomic_fcmpset_16
118
119ATOMIC_CMPSET_ACQ_REL(16);
120ATOMIC_FCMPSET_ACQ_REL(16);
121
122#define	atomic_load_acq_16	atomic_load_acq_16
123static __inline uint16_t
124atomic_load_acq_16(volatile uint16_t *p)
125{
126	uint16_t ret;
127
128	ret = *p;
129
130	fence();
131
132	return (ret);
133}
134
135static __inline void
136atomic_store_rel_16(volatile uint16_t *p, uint16_t val)
137{
138
139	fence();
140
141	*p = val;
142}
143
144#define	atomic_cmpset_acq_short		atomic_cmpset_acq_16
145#define	atomic_fcmpset_acq_short	atomic_fcmpset_acq_16
146#define	atomic_load_acq_short		atomic_load_acq_16
147
148#define	atomic_cmpset_rel_short		atomic_cmpset_rel_16
149#define	atomic_fcmpset_rel_short	atomic_fcmpset_rel_16
150#define	atomic_store_rel_short		atomic_store_rel_16
151
152static __inline void
153atomic_add_32(volatile uint32_t *p, uint32_t val)
154{
155
156	__asm __volatile("amoadd.w zero, %1, %0"
157			: "+A" (*p)
158			: "r" (val)
159			: "memory");
160}
161
162static __inline void
163atomic_subtract_32(volatile uint32_t *p, uint32_t val)
164{
165
166	__asm __volatile("amoadd.w zero, %1, %0"
167			: "+A" (*p)
168			: "r" (-val)
169			: "memory");
170}
171
172static __inline void
173atomic_set_32(volatile uint32_t *p, uint32_t val)
174{
175
176	__asm __volatile("amoor.w zero, %1, %0"
177			: "+A" (*p)
178			: "r" (val)
179			: "memory");
180}
181
182static __inline void
183atomic_clear_32(volatile uint32_t *p, uint32_t val)
184{
185
186	__asm __volatile("amoand.w zero, %1, %0"
187			: "+A" (*p)
188			: "r" (~val)
189			: "memory");
190}
191
192static __inline int
193atomic_cmpset_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
194{
195	uint32_t tmp;
196	int res;
197
198	res = 0;
199
200	__asm __volatile(
201		"0:"
202			"li   %1, 1\n" /* Preset to fail */
203			"lr.w %0, %2\n"
204			"bne  %0, %z3, 1f\n"
205			"sc.w %1, %z4, %2\n"
206			"bnez %1, 0b\n"
207		"1:"
208			: "=&r" (tmp), "=&r" (res), "+A" (*p)
209			: "rJ" ((long)(int32_t)cmpval), "rJ" (newval)
210			: "memory");
211
212	return (!res);
213}
214
215static __inline int
216atomic_fcmpset_32(volatile uint32_t *p, uint32_t *cmpval, uint32_t newval)
217{
218	uint32_t tmp;
219	int res;
220
221	res = 0;
222
223	__asm __volatile(
224		"0:"
225			"li   %1, 1\n"		/* Preset to fail */
226			"lr.w %0, %2\n"		/* Load old value */
227			"bne  %0, %z4, 1f\n"	/* Compare */
228			"sc.w %1, %z5, %2\n"	/* Try to store new value */
229			"j 2f\n"
230		"1:"
231			"sw   %0, %3\n"		/* Save old value */
232		"2:"
233			: "=&r" (tmp), "=&r" (res), "+A" (*p), "+A" (*cmpval)
234			: "rJ" ((long)(int32_t)*cmpval), "rJ" (newval)
235			: "memory");
236
237	return (!res);
238}
239
240static __inline uint32_t
241atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
242{
243	uint32_t ret;
244
245	__asm __volatile("amoadd.w %0, %2, %1"
246			: "=&r" (ret), "+A" (*p)
247			: "r" (val)
248			: "memory");
249
250	return (ret);
251}
252
253static __inline uint32_t
254atomic_readandclear_32(volatile uint32_t *p)
255{
256	uint32_t ret;
257	uint32_t val;
258
259	val = 0;
260
261	__asm __volatile("amoswap.w %0, %2, %1"
262			: "=&r"(ret), "+A" (*p)
263			: "r" (val)
264			: "memory");
265
266	return (ret);
267}
268
269#define	atomic_add_int		atomic_add_32
270#define	atomic_clear_int	atomic_clear_32
271#define	atomic_cmpset_int	atomic_cmpset_32
272#define	atomic_fcmpset_int	atomic_fcmpset_32
273#define	atomic_fetchadd_int	atomic_fetchadd_32
274#define	atomic_readandclear_int	atomic_readandclear_32
275#define	atomic_set_int		atomic_set_32
276#define	atomic_subtract_int	atomic_subtract_32
277
278ATOMIC_ACQ_REL(set, 32)
279ATOMIC_ACQ_REL(clear, 32)
280ATOMIC_ACQ_REL(add, 32)
281ATOMIC_ACQ_REL(subtract, 32)
282
283ATOMIC_CMPSET_ACQ_REL(32);
284ATOMIC_FCMPSET_ACQ_REL(32);
285
286static __inline uint32_t
287atomic_load_acq_32(volatile uint32_t *p)
288{
289	uint32_t ret;
290
291	ret = *p;
292
293	fence();
294
295	return (ret);
296}
297
298static __inline void
299atomic_store_rel_32(volatile uint32_t *p, uint32_t val)
300{
301
302	fence();
303
304	*p = val;
305}
306
307#define	atomic_add_acq_int	atomic_add_acq_32
308#define	atomic_clear_acq_int	atomic_clear_acq_32
309#define	atomic_cmpset_acq_int	atomic_cmpset_acq_32
310#define	atomic_fcmpset_acq_int	atomic_fcmpset_acq_32
311#define	atomic_load_acq_int	atomic_load_acq_32
312#define	atomic_set_acq_int	atomic_set_acq_32
313#define	atomic_subtract_acq_int	atomic_subtract_acq_32
314
315#define	atomic_add_rel_int	atomic_add_rel_32
316#define	atomic_clear_rel_int	atomic_clear_rel_32
317#define	atomic_cmpset_rel_int	atomic_cmpset_rel_32
318#define	atomic_fcmpset_rel_int	atomic_fcmpset_rel_32
319#define	atomic_set_rel_int	atomic_set_rel_32
320#define	atomic_subtract_rel_int	atomic_subtract_rel_32
321#define	atomic_store_rel_int	atomic_store_rel_32
322
323static __inline void
324atomic_add_64(volatile uint64_t *p, uint64_t val)
325{
326
327	__asm __volatile("amoadd.d zero, %1, %0"
328			: "+A" (*p)
329			: "r" (val)
330			: "memory");
331}
332
333static __inline void
334atomic_subtract_64(volatile uint64_t *p, uint64_t val)
335{
336
337	__asm __volatile("amoadd.d zero, %1, %0"
338			: "+A" (*p)
339			: "r" (-val)
340			: "memory");
341}
342
343static __inline void
344atomic_set_64(volatile uint64_t *p, uint64_t val)
345{
346
347	__asm __volatile("amoor.d zero, %1, %0"
348			: "+A" (*p)
349			: "r" (val)
350			: "memory");
351}
352
353static __inline void
354atomic_clear_64(volatile uint64_t *p, uint64_t val)
355{
356
357	__asm __volatile("amoand.d zero, %1, %0"
358			: "+A" (*p)
359			: "r" (~val)
360			: "memory");
361}
362
363static __inline int
364atomic_cmpset_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
365{
366	uint64_t tmp;
367	int res;
368
369	res = 0;
370
371	__asm __volatile(
372		"0:"
373			"li   %1, 1\n" /* Preset to fail */
374			"lr.d %0, %2\n"
375			"bne  %0, %z3, 1f\n"
376			"sc.d %1, %z4, %2\n"
377			"bnez %1, 0b\n"
378		"1:"
379			: "=&r" (tmp), "=&r" (res), "+A" (*p)
380			: "rJ" (cmpval), "rJ" (newval)
381			: "memory");
382
383	return (!res);
384}
385
386static __inline int
387atomic_fcmpset_64(volatile uint64_t *p, uint64_t *cmpval, uint64_t newval)
388{
389	uint64_t tmp;
390	int res;
391
392	res = 0;
393
394	__asm __volatile(
395		"0:"
396			"li   %1, 1\n"		/* Preset to fail */
397			"lr.d %0, %2\n"		/* Load old value */
398			"bne  %0, %z4, 1f\n"	/* Compare */
399			"sc.d %1, %z5, %2\n"	/* Try to store new value */
400			"j 2f\n"
401		"1:"
402			"sd   %0, %3\n"		/* Save old value */
403		"2:"
404			: "=&r" (tmp), "=&r" (res), "+A" (*p), "+A" (*cmpval)
405			: "rJ" (*cmpval), "rJ" (newval)
406			: "memory");
407
408	return (!res);
409}
410
411static __inline uint64_t
412atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
413{
414	uint64_t ret;
415
416	__asm __volatile("amoadd.d %0, %2, %1"
417			: "=&r" (ret), "+A" (*p)
418			: "r" (val)
419			: "memory");
420
421	return (ret);
422}
423
424static __inline uint64_t
425atomic_readandclear_64(volatile uint64_t *p)
426{
427	uint64_t ret;
428	uint64_t val;
429
430	val = 0;
431
432	__asm __volatile("amoswap.d %0, %2, %1"
433			: "=&r"(ret), "+A" (*p)
434			: "r" (val)
435			: "memory");
436
437	return (ret);
438}
439
440static __inline uint32_t
441atomic_swap_32(volatile uint32_t *p, uint32_t val)
442{
443	uint32_t old;
444
445	__asm __volatile("amoswap.w %0, %2, %1"
446			: "=&r"(old), "+A" (*p)
447			: "r" (val)
448			: "memory");
449
450	return (old);
451}
452
453static __inline uint64_t
454atomic_swap_64(volatile uint64_t *p, uint64_t val)
455{
456	uint64_t old;
457
458	__asm __volatile("amoswap.d %0, %2, %1"
459			: "=&r"(old), "+A" (*p)
460			: "r" (val)
461			: "memory");
462
463	return (old);
464}
465
466#define	atomic_swap_int			atomic_swap_32
467
468#define	atomic_add_long			atomic_add_64
469#define	atomic_clear_long		atomic_clear_64
470#define	atomic_cmpset_long		atomic_cmpset_64
471#define	atomic_fcmpset_long		atomic_fcmpset_64
472#define	atomic_fetchadd_long		atomic_fetchadd_64
473#define	atomic_readandclear_long	atomic_readandclear_64
474#define	atomic_set_long			atomic_set_64
475#define	atomic_subtract_long		atomic_subtract_64
476#define	atomic_swap_long		atomic_swap_64
477
478#define	atomic_add_ptr			atomic_add_64
479#define	atomic_clear_ptr		atomic_clear_64
480#define	atomic_cmpset_ptr		atomic_cmpset_64
481#define	atomic_fcmpset_ptr		atomic_fcmpset_64
482#define	atomic_fetchadd_ptr		atomic_fetchadd_64
483#define	atomic_readandclear_ptr		atomic_readandclear_64
484#define	atomic_set_ptr			atomic_set_64
485#define	atomic_subtract_ptr		atomic_subtract_64
486#define	atomic_swap_ptr			atomic_swap_64
487
488ATOMIC_ACQ_REL(set, 64)
489ATOMIC_ACQ_REL(clear, 64)
490ATOMIC_ACQ_REL(add, 64)
491ATOMIC_ACQ_REL(subtract, 64)
492
493ATOMIC_CMPSET_ACQ_REL(64);
494ATOMIC_FCMPSET_ACQ_REL(64);
495
496static __inline uint64_t
497atomic_load_acq_64(volatile uint64_t *p)
498{
499	uint64_t ret;
500
501	ret = *p;
502
503	fence();
504
505	return (ret);
506}
507
508static __inline void
509atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
510{
511
512	fence();
513
514	*p = val;
515}
516
517#define	atomic_add_acq_long		atomic_add_acq_64
518#define	atomic_clear_acq_long		atomic_clear_acq_64
519#define	atomic_cmpset_acq_long		atomic_cmpset_acq_64
520#define	atomic_fcmpset_acq_long		atomic_fcmpset_acq_64
521#define	atomic_load_acq_long		atomic_load_acq_64
522#define	atomic_set_acq_long		atomic_set_acq_64
523#define	atomic_subtract_acq_long	atomic_subtract_acq_64
524
525#define	atomic_add_acq_ptr		atomic_add_acq_64
526#define	atomic_clear_acq_ptr		atomic_clear_acq_64
527#define	atomic_cmpset_acq_ptr		atomic_cmpset_acq_64
528#define	atomic_fcmpset_acq_ptr		atomic_fcmpset_acq_64
529#define	atomic_load_acq_ptr		atomic_load_acq_64
530#define	atomic_set_acq_ptr		atomic_set_acq_64
531#define	atomic_subtract_acq_ptr		atomic_subtract_acq_64
532
533#undef ATOMIC_ACQ_REL
534
535static __inline void
536atomic_thread_fence_acq(void)
537{
538
539	fence();
540}
541
542static __inline void
543atomic_thread_fence_rel(void)
544{
545
546	fence();
547}
548
549static __inline void
550atomic_thread_fence_acq_rel(void)
551{
552
553	fence();
554}
555
556static __inline void
557atomic_thread_fence_seq_cst(void)
558{
559
560	fence();
561}
562
563#define	atomic_add_rel_long		atomic_add_rel_64
564#define	atomic_clear_rel_long		atomic_clear_rel_64
565
566#define	atomic_add_rel_long		atomic_add_rel_64
567#define	atomic_clear_rel_long		atomic_clear_rel_64
568#define	atomic_cmpset_rel_long		atomic_cmpset_rel_64
569#define	atomic_fcmpset_rel_long		atomic_fcmpset_rel_64
570#define	atomic_set_rel_long		atomic_set_rel_64
571#define	atomic_subtract_rel_long	atomic_subtract_rel_64
572#define	atomic_store_rel_long		atomic_store_rel_64
573
574#define	atomic_add_rel_ptr		atomic_add_rel_64
575#define	atomic_clear_rel_ptr		atomic_clear_rel_64
576#define	atomic_cmpset_rel_ptr		atomic_cmpset_rel_64
577#define	atomic_fcmpset_rel_ptr		atomic_fcmpset_rel_64
578#define	atomic_set_rel_ptr		atomic_set_rel_64
579#define	atomic_subtract_rel_ptr		atomic_subtract_rel_64
580#define	atomic_store_rel_ptr		atomic_store_rel_64
581
582#include <sys/_atomic_subword.h>
583
584#endif /* _MACHINE_ATOMIC_H_ */
585