1/*-
2 * Copyright (c) 2015 Ruslan Bukin <br@bsdpad.com>
3 * All rights reserved.
4 *
5 * Portions of this software were developed by SRI International and the
6 * University of Cambridge Computer Laboratory under DARPA/AFRL contract
7 * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
8 *
9 * Portions of this software were developed by the University of Cambridge
10 * Computer Laboratory as part of the CTSRD Project, with support from the
11 * UK Higher Education Innovation Fund (HEIF).
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * $FreeBSD$
35 */
36
37#ifndef	_MACHINE_ATOMIC_H_
38#define	_MACHINE_ATOMIC_H_
39
40#include <sys/atomic_common.h>
41
42#define	fence()	__asm __volatile("fence" ::: "memory");
43#define	mb()	fence()
44#define	rmb()	fence()
45#define	wmb()	fence()
46
47static __inline int atomic_cmpset_8(__volatile uint8_t *, uint8_t, uint8_t);
48static __inline int atomic_fcmpset_8(__volatile uint8_t *, uint8_t *, uint8_t);
49static __inline int atomic_cmpset_16(__volatile uint16_t *, uint16_t, uint16_t);
50static __inline int atomic_fcmpset_16(__volatile uint16_t *, uint16_t *,
51    uint16_t);
52
53#define	ATOMIC_ACQ_REL(NAME, WIDTH)					\
54static __inline  void							\
55atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
56{									\
57	atomic_##NAME##_##WIDTH(p, v);					\
58	fence(); 							\
59}									\
60									\
61static __inline  void							\
62atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
63{									\
64	fence();							\
65	atomic_##NAME##_##WIDTH(p, v);					\
66}
67
68#define	ATOMIC_CMPSET_ACQ_REL(WIDTH)					\
69static __inline  int							\
70atomic_cmpset_acq_##WIDTH(__volatile uint##WIDTH##_t *p,		\
71    uint##WIDTH##_t cmpval, uint##WIDTH##_t newval)			\
72{									\
73	int retval;							\
74									\
75	retval = atomic_cmpset_##WIDTH(p, cmpval, newval);		\
76	fence();							\
77	return (retval);						\
78}									\
79									\
80static __inline  int							\
81atomic_cmpset_rel_##WIDTH(__volatile uint##WIDTH##_t *p,		\
82    uint##WIDTH##_t cmpval, uint##WIDTH##_t newval)			\
83{									\
84	fence();							\
85	return (atomic_cmpset_##WIDTH(p, cmpval, newval));		\
86}
87
88#define	ATOMIC_FCMPSET_ACQ_REL(WIDTH)					\
89static __inline  int							\
90atomic_fcmpset_acq_##WIDTH(__volatile uint##WIDTH##_t *p,		\
91    uint##WIDTH##_t *cmpval, uint##WIDTH##_t newval)			\
92{									\
93	int retval;							\
94									\
95	retval = atomic_fcmpset_##WIDTH(p, cmpval, newval);		\
96	fence();							\
97	return (retval);						\
98}									\
99									\
100static __inline  int							\
101atomic_fcmpset_rel_##WIDTH(__volatile uint##WIDTH##_t *p,		\
102    uint##WIDTH##_t *cmpval, uint##WIDTH##_t newval)			\
103{									\
104	fence();							\
105	return (atomic_fcmpset_##WIDTH(p, cmpval, newval));		\
106}
107
108ATOMIC_CMPSET_ACQ_REL(8);
109ATOMIC_FCMPSET_ACQ_REL(8);
110ATOMIC_CMPSET_ACQ_REL(16);
111ATOMIC_FCMPSET_ACQ_REL(16);
112
113#define	atomic_cmpset_char		atomic_cmpset_8
114#define	atomic_cmpset_acq_char		atomic_cmpset_acq_8
115#define	atomic_cmpset_rel_char		atomic_cmpset_rel_8
116#define	atomic_fcmpset_char		atomic_fcmpset_8
117#define	atomic_fcmpset_acq_char		atomic_fcmpset_acq_8
118#define	atomic_fcmpset_rel_char		atomic_fcmpset_rel_8
119
120#define	atomic_cmpset_short		atomic_cmpset_16
121#define	atomic_cmpset_acq_short		atomic_cmpset_acq_16
122#define	atomic_cmpset_rel_short		atomic_cmpset_rel_16
123#define	atomic_fcmpset_short		atomic_fcmpset_16
124#define	atomic_fcmpset_acq_short	atomic_fcmpset_acq_16
125#define	atomic_fcmpset_rel_short	atomic_fcmpset_rel_16
126
127static __inline void
128atomic_add_32(volatile uint32_t *p, uint32_t val)
129{
130
131	__asm __volatile("amoadd.w zero, %1, %0"
132			: "+A" (*p)
133			: "r" (val)
134			: "memory");
135}
136
137static __inline void
138atomic_subtract_32(volatile uint32_t *p, uint32_t val)
139{
140
141	__asm __volatile("amoadd.w zero, %1, %0"
142			: "+A" (*p)
143			: "r" (-val)
144			: "memory");
145}
146
147static __inline void
148atomic_set_32(volatile uint32_t *p, uint32_t val)
149{
150
151	__asm __volatile("amoor.w zero, %1, %0"
152			: "+A" (*p)
153			: "r" (val)
154			: "memory");
155}
156
157static __inline void
158atomic_clear_32(volatile uint32_t *p, uint32_t val)
159{
160
161	__asm __volatile("amoand.w zero, %1, %0"
162			: "+A" (*p)
163			: "r" (~val)
164			: "memory");
165}
166
167static __inline int
168atomic_cmpset_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
169{
170	uint32_t tmp;
171	int res;
172
173	res = 0;
174
175	__asm __volatile(
176		"0:"
177			"li   %1, 1\n" /* Preset to fail */
178			"lr.w %0, %2\n"
179			"bne  %0, %z3, 1f\n"
180			"sc.w %1, %z4, %2\n"
181			"bnez %1, 0b\n"
182		"1:"
183			: "=&r" (tmp), "=&r" (res), "+A" (*p)
184			: "rJ" ((long)(int32_t)cmpval), "rJ" (newval)
185			: "memory");
186
187	return (!res);
188}
189
190static __inline int
191atomic_fcmpset_32(volatile uint32_t *p, uint32_t *cmpval, uint32_t newval)
192{
193	uint32_t tmp;
194	int res;
195
196	res = 0;
197
198	__asm __volatile(
199		"0:"
200			"li   %1, 1\n"		/* Preset to fail */
201			"lr.w %0, %2\n"		/* Load old value */
202			"bne  %0, %z4, 1f\n"	/* Compare */
203			"sc.w %1, %z5, %2\n"	/* Try to store new value */
204			"j 2f\n"
205		"1:"
206			"sw   %0, %3\n"		/* Save old value */
207		"2:"
208			: "=&r" (tmp), "=&r" (res), "+A" (*p), "+A" (*cmpval)
209			: "rJ" ((long)(int32_t)*cmpval), "rJ" (newval)
210			: "memory");
211
212	return (!res);
213}
214
215static __inline uint32_t
216atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
217{
218	uint32_t ret;
219
220	__asm __volatile("amoadd.w %0, %2, %1"
221			: "=&r" (ret), "+A" (*p)
222			: "r" (val)
223			: "memory");
224
225	return (ret);
226}
227
228static __inline uint32_t
229atomic_readandclear_32(volatile uint32_t *p)
230{
231	uint32_t ret;
232	uint32_t val;
233
234	val = 0;
235
236	__asm __volatile("amoswap.w %0, %2, %1"
237			: "=&r"(ret), "+A" (*p)
238			: "r" (val)
239			: "memory");
240
241	return (ret);
242}
243
244#define	atomic_add_int		atomic_add_32
245#define	atomic_clear_int	atomic_clear_32
246#define	atomic_cmpset_int	atomic_cmpset_32
247#define	atomic_fcmpset_int	atomic_fcmpset_32
248#define	atomic_fetchadd_int	atomic_fetchadd_32
249#define	atomic_readandclear_int	atomic_readandclear_32
250#define	atomic_set_int		atomic_set_32
251#define	atomic_subtract_int	atomic_subtract_32
252
253ATOMIC_ACQ_REL(set, 32)
254ATOMIC_ACQ_REL(clear, 32)
255ATOMIC_ACQ_REL(add, 32)
256ATOMIC_ACQ_REL(subtract, 32)
257
258ATOMIC_CMPSET_ACQ_REL(32);
259ATOMIC_FCMPSET_ACQ_REL(32);
260
261static __inline uint32_t
262atomic_load_acq_32(volatile uint32_t *p)
263{
264	uint32_t ret;
265
266	ret = *p;
267
268	fence();
269
270	return (ret);
271}
272
273static __inline void
274atomic_store_rel_32(volatile uint32_t *p, uint32_t val)
275{
276
277	fence();
278
279	*p = val;
280}
281
282#define	atomic_add_acq_int	atomic_add_acq_32
283#define	atomic_clear_acq_int	atomic_clear_acq_32
284#define	atomic_cmpset_acq_int	atomic_cmpset_acq_32
285#define	atomic_fcmpset_acq_int	atomic_fcmpset_acq_32
286#define	atomic_load_acq_int	atomic_load_acq_32
287#define	atomic_set_acq_int	atomic_set_acq_32
288#define	atomic_subtract_acq_int	atomic_subtract_acq_32
289
290#define	atomic_add_rel_int	atomic_add_rel_32
291#define	atomic_clear_rel_int	atomic_clear_rel_32
292#define	atomic_cmpset_rel_int	atomic_cmpset_rel_32
293#define	atomic_fcmpset_rel_int	atomic_fcmpset_rel_32
294#define	atomic_set_rel_int	atomic_set_rel_32
295#define	atomic_subtract_rel_int	atomic_subtract_rel_32
296#define	atomic_store_rel_int	atomic_store_rel_32
297
298static __inline void
299atomic_add_64(volatile uint64_t *p, uint64_t val)
300{
301
302	__asm __volatile("amoadd.d zero, %1, %0"
303			: "+A" (*p)
304			: "r" (val)
305			: "memory");
306}
307
308static __inline void
309atomic_subtract_64(volatile uint64_t *p, uint64_t val)
310{
311
312	__asm __volatile("amoadd.d zero, %1, %0"
313			: "+A" (*p)
314			: "r" (-val)
315			: "memory");
316}
317
318static __inline void
319atomic_set_64(volatile uint64_t *p, uint64_t val)
320{
321
322	__asm __volatile("amoor.d zero, %1, %0"
323			: "+A" (*p)
324			: "r" (val)
325			: "memory");
326}
327
328static __inline void
329atomic_clear_64(volatile uint64_t *p, uint64_t val)
330{
331
332	__asm __volatile("amoand.d zero, %1, %0"
333			: "+A" (*p)
334			: "r" (~val)
335			: "memory");
336}
337
338static __inline int
339atomic_cmpset_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
340{
341	uint64_t tmp;
342	int res;
343
344	res = 0;
345
346	__asm __volatile(
347		"0:"
348			"li   %1, 1\n" /* Preset to fail */
349			"lr.d %0, %2\n"
350			"bne  %0, %z3, 1f\n"
351			"sc.d %1, %z4, %2\n"
352			"bnez %1, 0b\n"
353		"1:"
354			: "=&r" (tmp), "=&r" (res), "+A" (*p)
355			: "rJ" (cmpval), "rJ" (newval)
356			: "memory");
357
358	return (!res);
359}
360
361static __inline int
362atomic_fcmpset_64(volatile uint64_t *p, uint64_t *cmpval, uint64_t newval)
363{
364	uint64_t tmp;
365	int res;
366
367	res = 0;
368
369	__asm __volatile(
370		"0:"
371			"li   %1, 1\n"		/* Preset to fail */
372			"lr.d %0, %2\n"		/* Load old value */
373			"bne  %0, %z4, 1f\n"	/* Compare */
374			"sc.d %1, %z5, %2\n"	/* Try to store new value */
375			"j 2f\n"
376		"1:"
377			"sd   %0, %3\n"		/* Save old value */
378		"2:"
379			: "=&r" (tmp), "=&r" (res), "+A" (*p), "+A" (*cmpval)
380			: "rJ" (*cmpval), "rJ" (newval)
381			: "memory");
382
383	return (!res);
384}
385
386static __inline uint64_t
387atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
388{
389	uint64_t ret;
390
391	__asm __volatile("amoadd.d %0, %2, %1"
392			: "=&r" (ret), "+A" (*p)
393			: "r" (val)
394			: "memory");
395
396	return (ret);
397}
398
399static __inline uint64_t
400atomic_readandclear_64(volatile uint64_t *p)
401{
402	uint64_t ret;
403	uint64_t val;
404
405	val = 0;
406
407	__asm __volatile("amoswap.d %0, %2, %1"
408			: "=&r"(ret), "+A" (*p)
409			: "r" (val)
410			: "memory");
411
412	return (ret);
413}
414
415static __inline uint32_t
416atomic_swap_32(volatile uint32_t *p, uint32_t val)
417{
418	uint32_t old;
419
420	__asm __volatile("amoswap.w %0, %2, %1"
421			: "=&r"(old), "+A" (*p)
422			: "r" (val)
423			: "memory");
424
425	return (old);
426}
427
428static __inline uint64_t
429atomic_swap_64(volatile uint64_t *p, uint64_t val)
430{
431	uint64_t old;
432
433	__asm __volatile("amoswap.d %0, %2, %1"
434			: "=&r"(old), "+A" (*p)
435			: "r" (val)
436			: "memory");
437
438	return (old);
439}
440
441#define	atomic_swap_int			atomic_swap_32
442
443#define	atomic_add_long			atomic_add_64
444#define	atomic_clear_long		atomic_clear_64
445#define	atomic_cmpset_long		atomic_cmpset_64
446#define	atomic_fcmpset_long		atomic_fcmpset_64
447#define	atomic_fetchadd_long		atomic_fetchadd_64
448#define	atomic_readandclear_long	atomic_readandclear_64
449#define	atomic_set_long			atomic_set_64
450#define	atomic_subtract_long		atomic_subtract_64
451#define	atomic_swap_long		atomic_swap_64
452
453#define	atomic_add_ptr			atomic_add_64
454#define	atomic_clear_ptr		atomic_clear_64
455#define	atomic_cmpset_ptr		atomic_cmpset_64
456#define	atomic_fcmpset_ptr		atomic_fcmpset_64
457#define	atomic_fetchadd_ptr		atomic_fetchadd_64
458#define	atomic_readandclear_ptr		atomic_readandclear_64
459#define	atomic_set_ptr			atomic_set_64
460#define	atomic_subtract_ptr		atomic_subtract_64
461#define	atomic_swap_ptr			atomic_swap_64
462
463ATOMIC_ACQ_REL(set, 64)
464ATOMIC_ACQ_REL(clear, 64)
465ATOMIC_ACQ_REL(add, 64)
466ATOMIC_ACQ_REL(subtract, 64)
467
468ATOMIC_CMPSET_ACQ_REL(64);
469ATOMIC_FCMPSET_ACQ_REL(64);
470
471static __inline uint64_t
472atomic_load_acq_64(volatile uint64_t *p)
473{
474	uint64_t ret;
475
476	ret = *p;
477
478	fence();
479
480	return (ret);
481}
482
483static __inline void
484atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
485{
486
487	fence();
488
489	*p = val;
490}
491
492#define	atomic_add_acq_long		atomic_add_acq_64
493#define	atomic_clear_acq_long		atomic_clear_acq_64
494#define	atomic_cmpset_acq_long		atomic_cmpset_acq_64
495#define	atomic_fcmpset_acq_long		atomic_fcmpset_acq_64
496#define	atomic_load_acq_long		atomic_load_acq_64
497#define	atomic_set_acq_long		atomic_set_acq_64
498#define	atomic_subtract_acq_long	atomic_subtract_acq_64
499
500#define	atomic_add_acq_ptr		atomic_add_acq_64
501#define	atomic_clear_acq_ptr		atomic_clear_acq_64
502#define	atomic_cmpset_acq_ptr		atomic_cmpset_acq_64
503#define	atomic_fcmpset_acq_ptr		atomic_fcmpset_acq_64
504#define	atomic_load_acq_ptr		atomic_load_acq_64
505#define	atomic_set_acq_ptr		atomic_set_acq_64
506#define	atomic_subtract_acq_ptr		atomic_subtract_acq_64
507
508#undef ATOMIC_ACQ_REL
509
510static __inline void
511atomic_thread_fence_acq(void)
512{
513
514	fence();
515}
516
517static __inline void
518atomic_thread_fence_rel(void)
519{
520
521	fence();
522}
523
524static __inline void
525atomic_thread_fence_acq_rel(void)
526{
527
528	fence();
529}
530
531static __inline void
532atomic_thread_fence_seq_cst(void)
533{
534
535	fence();
536}
537
538#define	atomic_add_rel_long		atomic_add_rel_64
539#define	atomic_clear_rel_long		atomic_clear_rel_64
540
541#define	atomic_add_rel_long		atomic_add_rel_64
542#define	atomic_clear_rel_long		atomic_clear_rel_64
543#define	atomic_cmpset_rel_long		atomic_cmpset_rel_64
544#define	atomic_fcmpset_rel_long		atomic_fcmpset_rel_64
545#define	atomic_set_rel_long		atomic_set_rel_64
546#define	atomic_subtract_rel_long	atomic_subtract_rel_64
547#define	atomic_store_rel_long		atomic_store_rel_64
548
549#define	atomic_add_rel_ptr		atomic_add_rel_64
550#define	atomic_clear_rel_ptr		atomic_clear_rel_64
551#define	atomic_cmpset_rel_ptr		atomic_cmpset_rel_64
552#define	atomic_fcmpset_rel_ptr		atomic_fcmpset_rel_64
553#define	atomic_set_rel_ptr		atomic_set_rel_64
554#define	atomic_subtract_rel_ptr		atomic_subtract_rel_64
555#define	atomic_store_rel_ptr		atomic_store_rel_64
556
557#include <sys/_atomic_subword.h>
558
559#endif /* _MACHINE_ATOMIC_H_ */
560