1/* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
2
3/*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright (C) 2003-2004 Olivier Houchard
7 * Copyright (C) 1994-1997 Mark Brinicombe
8 * Copyright (C) 1994 Brini
9 * All rights reserved.
10 *
11 * This code is derived from software written for Brini by Mark Brinicombe
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *	This product includes software developed by Brini.
24 * 4. The name of Brini may not be used to endorse or promote products
25 *    derived from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
28 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
29 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
30 * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
33 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
34 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
35 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
36 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 */
38
39#ifndef	_MACHINE_ATOMIC_H_
40#define	_MACHINE_ATOMIC_H_
41
42#include <sys/atomic_common.h>
43
44#if __ARM_ARCH >= 7
45#define isb()  __asm __volatile("isb" : : : "memory")
46#define dsb()  __asm __volatile("dsb" : : : "memory")
47#define dmb()  __asm __volatile("dmb" : : : "memory")
48#else
49#define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
50#define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
51#define dmb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
52#endif
53
54#define mb()   dmb()
55#define wmb()  dmb()
56#define rmb()  dmb()
57
58#define	ARM_HAVE_ATOMIC64
59
60#define ATOMIC_ACQ_REL_LONG(NAME)					\
61static __inline void							\
62atomic_##NAME##_acq_long(__volatile u_long *p, u_long v)		\
63{									\
64	atomic_##NAME##_long(p, v);					\
65	dmb();								\
66}									\
67									\
68static __inline  void							\
69atomic_##NAME##_rel_long(__volatile u_long *p, u_long v)		\
70{									\
71	dmb();								\
72	atomic_##NAME##_long(p, v);					\
73}
74
75#define	ATOMIC_ACQ_REL(NAME, WIDTH)					\
76static __inline  void							\
77atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
78{									\
79	atomic_##NAME##_##WIDTH(p, v);					\
80	dmb();								\
81}									\
82									\
83static __inline  void							\
84atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
85{									\
86	dmb();								\
87	atomic_##NAME##_##WIDTH(p, v);					\
88}
89
90static __inline void
91atomic_add_32(volatile uint32_t *p, uint32_t val)
92{
93	uint32_t tmp = 0, tmp2 = 0;
94
95	__asm __volatile(
96	    "1: ldrex	%0, [%2]	\n"
97	    "   add	%0, %0, %3	\n"
98	    "   strex	%1, %0, [%2]	\n"
99	    "   cmp	%1, #0		\n"
100	    "   it	ne		\n"
101	    "   bne	1b		\n"
102	    : "=&r" (tmp), "+r" (tmp2)
103	    ,"+r" (p), "+r" (val) : : "cc", "memory");
104}
105
106static __inline void
107atomic_add_64(volatile uint64_t *p, uint64_t val)
108{
109	uint64_t tmp;
110	uint32_t exflag;
111
112	__asm __volatile(
113	    "1:							\n"
114	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
115	    "   adds	%Q[tmp], %Q[val]			\n"
116	    "   adc	%R[tmp], %R[tmp], %R[val]		\n"
117	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
118	    "   teq	%[exf], #0				\n"
119	    "   it	ne					\n"
120	    "   bne	1b					\n"
121	    : [exf] "=&r" (exflag),
122	      [tmp] "=&r" (tmp)
123	    : [ptr] "r"   (p),
124	      [val] "r"   (val)
125	    : "cc", "memory");
126}
127
128static __inline void
129atomic_add_long(volatile u_long *p, u_long val)
130{
131
132	atomic_add_32((volatile uint32_t *)p, val);
133}
134
135ATOMIC_ACQ_REL(add, 32)
136ATOMIC_ACQ_REL(add, 64)
137ATOMIC_ACQ_REL_LONG(add)
138
139static __inline void
140atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
141{
142	uint32_t tmp = 0, tmp2 = 0;
143
144	__asm __volatile(
145	    "1: ldrex	%0, [%2]	\n"
146	    "   bic	%0, %0, %3	\n"
147	    "   strex	%1, %0, [%2]	\n"
148	    "   cmp	%1, #0		\n"
149	    "   it	ne		\n"
150	    "   bne	1b		\n"
151	    : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
152	    : : "cc", "memory");
153}
154
155static __inline void
156atomic_clear_64(volatile uint64_t *p, uint64_t val)
157{
158	uint64_t tmp;
159	uint32_t exflag;
160
161	__asm __volatile(
162	    "1:							\n"
163	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
164	    "   bic	%Q[tmp], %Q[val]			\n"
165	    "   bic	%R[tmp], %R[val]			\n"
166	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
167	    "   teq	%[exf], #0				\n"
168	    "   it	ne					\n"
169	    "   bne	1b					\n"
170	    : [exf] "=&r" (exflag),
171	      [tmp] "=&r" (tmp)
172	    : [ptr] "r"   (p),
173	      [val] "r"   (val)
174	    : "cc", "memory");
175}
176
177static __inline void
178atomic_clear_long(volatile u_long *address, u_long setmask)
179{
180
181	atomic_clear_32((volatile uint32_t *)address, setmask);
182}
183
184ATOMIC_ACQ_REL(clear, 32)
185ATOMIC_ACQ_REL(clear, 64)
186ATOMIC_ACQ_REL_LONG(clear)
187
188#define ATOMIC_FCMPSET_CODE(RET, TYPE, SUF)                   \
189    {                                                         \
190	TYPE tmp;                                             \
191                                                              \
192	__asm __volatile(                                     \
193	    "1: ldrex" SUF "   %[tmp], [%[ptr]]          \n"  \
194	    "   ldr" SUF "     %[ret], [%[oldv]]         \n"  \
195	    "   teq            %[tmp], %[ret]            \n"  \
196	    "   ittee          ne                        \n"  \
197	    "   str" SUF "ne   %[tmp], [%[oldv]]         \n"  \
198	    "   movne          %[ret], #0                \n"  \
199	    "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n"  \
200	    "   eorseq         %[ret], #1                \n"  \
201	    "   beq            1b                        \n"  \
202	    : [ret] "=&r" (RET),                              \
203	      [tmp] "=&r" (tmp)                               \
204	    : [ptr] "r"   (_ptr),                             \
205	      [oldv] "r"  (_old),                             \
206	      [newv] "r"  (_new)                              \
207	    : "cc", "memory");                                \
208    }
209
210#define ATOMIC_FCMPSET_CODE64(RET)                                 \
211    {                                                              \
212	uint64_t cmp, tmp;                                         \
213                                                                   \
214	__asm __volatile(                                          \
215	    "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n"  \
216	    "   ldrd     %Q[cmp], %R[cmp], [%[oldv]]          \n"  \
217	    "   teq      %Q[tmp], %Q[cmp]                     \n"  \
218	    "   it       eq                                   \n"  \
219	    "   teqeq    %R[tmp], %R[cmp]                     \n"  \
220	    "   ittee    ne                                   \n"  \
221	    "   movne    %[ret], #0                           \n"  \
222	    "   strdne   %[cmp], [%[oldv]]                    \n"  \
223	    "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n"  \
224	    "   eorseq   %[ret], #1                           \n"  \
225	    "   beq      1b                                   \n"  \
226	    : [ret] "=&r" (RET),                                   \
227	      [cmp] "=&r" (cmp),                                   \
228	      [tmp] "=&r" (tmp)                                    \
229	    : [ptr] "r"   (_ptr),                                  \
230	      [oldv] "r"  (_old),                                  \
231	      [newv] "r"  (_new)                                   \
232	    : "cc", "memory");                                     \
233    }
234
235static __inline int
236atomic_fcmpset_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
237{
238	int ret;
239
240	ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
241	return (ret);
242}
243#define	atomic_fcmpset_8	atomic_fcmpset_8
244
245static __inline int
246atomic_fcmpset_acq_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
247{
248	int ret;
249
250	ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
251	dmb();
252	return (ret);
253}
254
255static __inline int
256atomic_fcmpset_rel_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
257{
258	int ret;
259
260	dmb();
261	ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
262	return (ret);
263}
264
265static __inline int
266atomic_fcmpset_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
267{
268	int ret;
269
270	ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
271	return (ret);
272}
273#define	atomic_fcmpset_16	atomic_fcmpset_16
274
275static __inline int
276atomic_fcmpset_acq_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
277{
278	int ret;
279
280	ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
281	dmb();
282	return (ret);
283}
284
285static __inline int
286atomic_fcmpset_rel_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
287{
288	int ret;
289
290	dmb();
291	ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
292	return (ret);
293}
294
295static __inline int
296atomic_fcmpset_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
297{
298	int ret;
299
300	ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
301	return (ret);
302}
303
304static __inline int
305atomic_fcmpset_acq_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
306{
307	int ret;
308
309	ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
310	dmb();
311	return (ret);
312}
313
314static __inline int
315atomic_fcmpset_rel_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
316{
317	int ret;
318
319	dmb();
320	ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
321	return (ret);
322}
323
324static __inline int
325atomic_fcmpset_long(volatile u_long *_ptr, u_long *_old, u_long _new)
326{
327	int ret;
328
329	ATOMIC_FCMPSET_CODE(ret, u_long, "");
330	return (ret);
331}
332
333static __inline int
334atomic_fcmpset_acq_long(volatile u_long *_ptr, u_long *_old, u_long _new)
335{
336	int ret;
337
338	ATOMIC_FCMPSET_CODE(ret, u_long, "");
339	dmb();
340	return (ret);
341}
342
343static __inline int
344atomic_fcmpset_rel_long(volatile u_long *_ptr, u_long *_old, u_long _new)
345{
346	int ret;
347
348	dmb();
349	ATOMIC_FCMPSET_CODE(ret, u_long, "");
350	return (ret);
351}
352
353static __inline int
354atomic_fcmpset_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
355{
356	int ret;
357
358	ATOMIC_FCMPSET_CODE64(ret);
359	return (ret);
360}
361
362static __inline int
363atomic_fcmpset_acq_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
364{
365	int ret;
366
367	ATOMIC_FCMPSET_CODE64(ret);
368	dmb();
369	return (ret);
370}
371
372static __inline int
373atomic_fcmpset_rel_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
374{
375	int ret;
376
377	dmb();
378	ATOMIC_FCMPSET_CODE64(ret);
379	return (ret);
380}
381
382#define ATOMIC_CMPSET_CODE(RET, SUF)                         \
383    {                                                        \
384	__asm __volatile(                                    \
385	    "1: ldrex" SUF "   %[ret], [%[ptr]]          \n" \
386	    "   teq            %[ret], %[oldv]           \n" \
387	    "   itee           ne                        \n" \
388	    "   movne          %[ret], #0                \n" \
389	    "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n" \
390	    "   eorseq         %[ret], #1                \n" \
391	    "   beq            1b                        \n" \
392	    : [ret] "=&r" (RET)                              \
393	    : [ptr] "r"   (_ptr),                            \
394	      [oldv] "r"  (_old),                            \
395	      [newv] "r"  (_new)                             \
396	    : "cc", "memory");                               \
397    }
398
399#define ATOMIC_CMPSET_CODE64(RET)                                 \
400    {                                                             \
401	uint64_t tmp;                                             \
402	                                                          \
403	__asm __volatile(                                         \
404	    "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n" \
405	    "   teq      %Q[tmp], %Q[oldv]                    \n" \
406	    "   it       eq                                   \n" \
407	    "   teqeq    %R[tmp], %R[oldv]                    \n" \
408	    "   itee     ne                                   \n" \
409	    "   movne    %[ret], #0                           \n" \
410	    "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n" \
411	    "   eorseq   %[ret], #1                           \n" \
412	    "   beq      1b                                   \n" \
413	    : [ret] "=&r" (RET),                                  \
414	      [tmp] "=&r" (tmp)                                   \
415	    : [ptr] "r"   (_ptr),                                 \
416	      [oldv] "r"  (_old),                                 \
417	      [newv] "r"  (_new)                                  \
418	    : "cc", "memory");                                    \
419    }
420
421static __inline int
422atomic_cmpset_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
423{
424	int ret;
425
426	ATOMIC_CMPSET_CODE(ret, "b");
427	return (ret);
428}
429#define	atomic_cmpset_8		atomic_cmpset_8
430
431static __inline int
432atomic_cmpset_acq_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
433{
434	int ret;
435
436	ATOMIC_CMPSET_CODE(ret, "b");
437	dmb();
438	return (ret);
439}
440
441static __inline int
442atomic_cmpset_rel_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
443{
444	int ret;
445
446	dmb();
447	ATOMIC_CMPSET_CODE(ret, "b");
448	return (ret);
449}
450
451static __inline int
452atomic_cmpset_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
453{
454	int ret;
455
456	ATOMIC_CMPSET_CODE(ret, "h");
457	return (ret);
458}
459#define	atomic_cmpset_16	atomic_cmpset_16
460
461static __inline int
462atomic_cmpset_acq_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
463{
464	int ret;
465
466	ATOMIC_CMPSET_CODE(ret, "h");
467	dmb();
468	return (ret);
469}
470
471static __inline int
472atomic_cmpset_rel_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
473{
474	int ret;
475
476	dmb();
477	ATOMIC_CMPSET_CODE(ret, "h");
478	return (ret);
479}
480
481static __inline int
482atomic_cmpset_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
483{
484	int ret;
485
486	ATOMIC_CMPSET_CODE(ret, "");
487	return (ret);
488}
489
490static __inline int
491atomic_cmpset_acq_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
492{
493	int ret;
494
495	ATOMIC_CMPSET_CODE(ret, "");
496	dmb();
497	return (ret);
498}
499
500static __inline int
501atomic_cmpset_rel_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
502{
503	int ret;
504
505	dmb();
506	ATOMIC_CMPSET_CODE(ret, "");
507	return (ret);
508}
509
510static __inline int
511atomic_cmpset_long(volatile u_long *_ptr, u_long _old, u_long _new)
512{
513	int ret;
514
515	ATOMIC_CMPSET_CODE(ret, "");
516	return (ret);
517}
518
519static __inline int
520atomic_cmpset_acq_long(volatile u_long *_ptr, u_long _old, u_long _new)
521{
522	int ret;
523
524	ATOMIC_CMPSET_CODE(ret, "");
525	dmb();
526	return (ret);
527}
528
529static __inline int
530atomic_cmpset_rel_long(volatile u_long *_ptr, u_long _old, u_long _new)
531{
532	int ret;
533
534	dmb();
535	ATOMIC_CMPSET_CODE(ret, "");
536	return (ret);
537}
538
539static __inline int
540atomic_cmpset_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
541{
542	int ret;
543
544	ATOMIC_CMPSET_CODE64(ret);
545	return (ret);
546}
547
548static __inline int
549atomic_cmpset_acq_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
550{
551	int ret;
552
553	ATOMIC_CMPSET_CODE64(ret);
554	dmb();
555	return (ret);
556}
557
558static __inline int
559atomic_cmpset_rel_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
560{
561	int ret;
562
563	dmb();
564	ATOMIC_CMPSET_CODE64(ret);
565	return (ret);
566}
567
568static __inline uint32_t
569atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
570{
571	uint32_t tmp = 0, tmp2 = 0, ret = 0;
572
573	__asm __volatile(
574	    "1: ldrex	%0, [%3]	\n"
575	    "   add	%1, %0, %4	\n"
576	    "   strex	%2, %1, [%3]	\n"
577	    "   cmp	%2, #0		\n"
578	    "   it	ne		\n"
579	    "   bne	1b		\n"
580	    : "+r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
581	    : : "cc", "memory");
582	return (ret);
583}
584
585static __inline uint64_t
586atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
587{
588	uint64_t ret, tmp;
589	uint32_t exflag;
590
591	__asm __volatile(
592	    "1:							\n"
593	    "   ldrexd	%Q[ret], %R[ret], [%[ptr]]		\n"
594	    "   adds	%Q[tmp], %Q[ret], %Q[val]		\n"
595	    "   adc	%R[tmp], %R[ret], %R[val]		\n"
596	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
597	    "   teq	%[exf], #0				\n"
598	    "   it	ne					\n"
599	    "   bne	1b					\n"
600	    : [ret] "=&r" (ret),
601	      [exf] "=&r" (exflag),
602	      [tmp] "=&r" (tmp)
603	    : [ptr] "r"   (p),
604	      [val] "r"   (val)
605	    : "cc", "memory");
606	return (ret);
607}
608
609static __inline u_long
610atomic_fetchadd_long(volatile u_long *p, u_long val)
611{
612
613	return (atomic_fetchadd_32((volatile uint32_t *)p, val));
614}
615
616static __inline uint32_t
617atomic_load_acq_32(volatile uint32_t *p)
618{
619	uint32_t v;
620
621	v = *p;
622	dmb();
623	return (v);
624}
625
626static __inline uint64_t
627atomic_load_64(volatile uint64_t *p)
628{
629	uint64_t ret;
630
631	/*
632	 * The only way to atomically load 64 bits is with LDREXD which puts the
633	 * exclusive monitor into the exclusive state, so reset it to open state
634	 * with CLREX because we don't actually need to store anything.
635	 */
636	__asm __volatile(
637	    "ldrexd	%Q[ret], %R[ret], [%[ptr]]	\n"
638	    "clrex					\n"
639	    : [ret] "=&r" (ret)
640	    : [ptr] "r"   (p)
641	    : "cc", "memory");
642	return (ret);
643}
644
645static __inline uint64_t
646atomic_load_acq_64(volatile uint64_t *p)
647{
648	uint64_t ret;
649
650	ret = atomic_load_64(p);
651	dmb();
652	return (ret);
653}
654
655static __inline u_long
656atomic_load_acq_long(volatile u_long *p)
657{
658	u_long v;
659
660	v = *p;
661	dmb();
662	return (v);
663}
664
665static __inline uint32_t
666atomic_readandclear_32(volatile uint32_t *p)
667{
668	uint32_t ret, tmp = 0, tmp2 = 0;
669
670	__asm __volatile(
671	    "1: ldrex	%0, [%3]	\n"
672	    "   mov	%1, #0		\n"
673	    "   strex	%2, %1, [%3]	\n"
674	    "   cmp	%2, #0		\n"
675	    "   it	ne		\n"
676	    "   bne	1b		\n"
677	    : "=r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p)
678	    : : "cc", "memory");
679	return (ret);
680}
681
682static __inline uint64_t
683atomic_readandclear_64(volatile uint64_t *p)
684{
685	uint64_t ret, tmp;
686	uint32_t exflag;
687
688	__asm __volatile(
689	    "1:							\n"
690	    "   ldrexd	%Q[ret], %R[ret], [%[ptr]]		\n"
691	    "   mov	%Q[tmp], #0				\n"
692	    "   mov	%R[tmp], #0				\n"
693	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
694	    "   teq	%[exf], #0				\n"
695	    "   it	ne					\n"
696	    "   bne	1b					\n"
697	    : [ret] "=&r" (ret),
698	      [exf] "=&r" (exflag),
699	      [tmp] "=&r" (tmp)
700	    : [ptr] "r"   (p)
701	    : "cc", "memory");
702	return (ret);
703}
704
705static __inline u_long
706atomic_readandclear_long(volatile u_long *p)
707{
708
709	return (atomic_readandclear_32((volatile uint32_t *)p));
710}
711
712static __inline void
713atomic_set_32(volatile uint32_t *address, uint32_t setmask)
714{
715	uint32_t tmp = 0, tmp2 = 0;
716
717	__asm __volatile(
718	    "1: ldrex	%0, [%2]	\n"
719	    "   orr	%0, %0, %3	\n"
720	    "   strex	%1, %0, [%2]	\n"
721	    "   cmp	%1, #0		\n"
722	    "   it	ne		\n"
723	    "   bne	1b		\n"
724	    : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
725	    : : "cc", "memory");
726}
727
728static __inline void
729atomic_set_64(volatile uint64_t *p, uint64_t val)
730{
731	uint64_t tmp;
732	uint32_t exflag;
733
734	__asm __volatile(
735	    "1:							\n"
736	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
737	    "   orr	%Q[tmp], %Q[val]			\n"
738	    "   orr	%R[tmp], %R[val]			\n"
739	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
740	    "   teq	%[exf], #0				\n"
741	    "   it	ne					\n"
742	    "   bne	1b					\n"
743	    : [exf] "=&r" (exflag),
744	      [tmp] "=&r" (tmp)
745	    : [ptr] "r"   (p),
746	      [val] "r"   (val)
747	    : "cc", "memory");
748}
749
750static __inline void
751atomic_set_long(volatile u_long *address, u_long setmask)
752{
753
754	atomic_set_32((volatile uint32_t *)address, setmask);
755}
756
757ATOMIC_ACQ_REL(set, 32)
758ATOMIC_ACQ_REL(set, 64)
759ATOMIC_ACQ_REL_LONG(set)
760
761static __inline void
762atomic_subtract_32(volatile uint32_t *p, uint32_t val)
763{
764	uint32_t tmp = 0, tmp2 = 0;
765
766	__asm __volatile(
767	    "1: ldrex	%0, [%2]	\n"
768	    "   sub	%0, %0, %3	\n"
769	    "   strex	%1, %0, [%2]	\n"
770	    "   cmp	%1, #0		\n"
771	    "   it	ne		\n"
772	    "   bne	1b		\n"
773	    : "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
774	    : : "cc", "memory");
775}
776
777static __inline void
778atomic_subtract_64(volatile uint64_t *p, uint64_t val)
779{
780	uint64_t tmp;
781	uint32_t exflag;
782
783	__asm __volatile(
784	    "1:							\n"
785	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
786	    "   subs	%Q[tmp], %Q[val]			\n"
787	    "   sbc	%R[tmp], %R[tmp], %R[val]		\n"
788	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
789	    "   teq	%[exf], #0				\n"
790	    "   it	ne					\n"
791	    "   bne	1b					\n"
792	    : [exf] "=&r" (exflag),
793	      [tmp] "=&r" (tmp)
794	    : [ptr] "r"   (p),
795	      [val] "r"   (val)
796	    : "cc", "memory");
797}
798
799static __inline void
800atomic_subtract_long(volatile u_long *p, u_long val)
801{
802
803	atomic_subtract_32((volatile uint32_t *)p, val);
804}
805
806ATOMIC_ACQ_REL(subtract, 32)
807ATOMIC_ACQ_REL(subtract, 64)
808ATOMIC_ACQ_REL_LONG(subtract)
809
810static __inline void
811atomic_store_64(volatile uint64_t *p, uint64_t val)
812{
813	uint64_t tmp;
814	uint32_t exflag;
815
816	/*
817	 * The only way to atomically store 64 bits is with STREXD, which will
818	 * succeed only if paired up with a preceeding LDREXD using the same
819	 * address, so we read and discard the existing value before storing.
820	 */
821	__asm __volatile(
822	    "1:							\n"
823	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
824	    "   strexd	%[exf], %Q[val], %R[val], [%[ptr]]	\n"
825	    "   teq	%[exf], #0				\n"
826	    "   it	ne					\n"
827	    "   bne	1b					\n"
828	    : [tmp] "=&r" (tmp),
829	      [exf] "=&r" (exflag)
830	    : [ptr] "r"   (p),
831	      [val] "r"   (val)
832	    : "cc", "memory");
833}
834
835static __inline void
836atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
837{
838
839	dmb();
840	*p = v;
841}
842
843static __inline void
844atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
845{
846
847	dmb();
848	atomic_store_64(p, val);
849}
850
851static __inline void
852atomic_store_rel_long(volatile u_long *p, u_long v)
853{
854
855	dmb();
856	*p = v;
857}
858
859static __inline int
860atomic_testandclear_32(volatile uint32_t *ptr, u_int bit)
861{
862	int newv, oldv, result;
863
864	__asm __volatile(
865	    "   mov     ip, #1					\n"
866	    "   lsl     ip, ip, %[bit]				\n"
867	    /*  Done with %[bit] as input, reuse below as output. */
868	    "1:							\n"
869	    "   ldrex	%[oldv], [%[ptr]]			\n"
870	    "   bic     %[newv], %[oldv], ip			\n"
871	    "   strex	%[bit], %[newv], [%[ptr]]		\n"
872	    "   teq	%[bit], #0				\n"
873	    "   it	ne					\n"
874	    "   bne	1b					\n"
875	    "   ands	%[bit], %[oldv], ip			\n"
876	    "   it	ne					\n"
877	    "   movne   %[bit], #1                              \n"
878	    : [bit]  "=&r"   (result),
879	      [oldv] "=&r"   (oldv),
880	      [newv] "=&r"   (newv)
881	    : [ptr]  "r"     (ptr),
882	             "[bit]" (bit & 0x1f)
883	    : "cc", "ip", "memory");
884
885	return (result);
886}
887
888static __inline int
889atomic_testandclear_int(volatile u_int *p, u_int v)
890{
891
892	return (atomic_testandclear_32((volatile uint32_t *)p, v));
893}
894
895static __inline int
896atomic_testandclear_long(volatile u_long *p, u_int v)
897{
898
899	return (atomic_testandclear_32((volatile uint32_t *)p, v));
900}
901#define	atomic_testandclear_long	atomic_testandclear_long
902
903
904static __inline int
905atomic_testandclear_64(volatile uint64_t *p, u_int v)
906{
907	volatile uint32_t *p32;
908
909	p32 = (volatile uint32_t *)p;
910	/*
911	 * Assume little-endian,
912	 * atomic_testandclear_32() uses only last 5 bits of v
913	 */
914	if ((v & 0x20) != 0)
915		p32++;
916	return (atomic_testandclear_32(p32, v));
917}
918
919static __inline int
920atomic_testandset_32(volatile uint32_t *ptr, u_int bit)
921{
922	int newv, oldv, result;
923
924	__asm __volatile(
925	    "   mov     ip, #1					\n"
926	    "   lsl     ip, ip, %[bit]				\n"
927	    /*  Done with %[bit] as input, reuse below as output. */
928	    "1:							\n"
929	    "   ldrex	%[oldv], [%[ptr]]			\n"
930	    "   orr     %[newv], %[oldv], ip			\n"
931	    "   strex	%[bit], %[newv], [%[ptr]]		\n"
932	    "   teq	%[bit], #0				\n"
933	    "   it	ne					\n"
934	    "   bne	1b					\n"
935	    "   ands	%[bit], %[oldv], ip			\n"
936	    "   it	ne					\n"
937	    "   movne   %[bit], #1                              \n"
938	    : [bit]  "=&r"   (result),
939	      [oldv] "=&r"   (oldv),
940	      [newv] "=&r"   (newv)
941	    : [ptr]  "r"     (ptr),
942	             "[bit]" (bit & 0x1f)
943	    : "cc", "ip", "memory");
944
945	return (result);
946}
947
948static __inline int
949atomic_testandset_int(volatile u_int *p, u_int v)
950{
951
952	return (atomic_testandset_32((volatile uint32_t *)p, v));
953}
954
955static __inline int
956atomic_testandset_long(volatile u_long *p, u_int v)
957{
958
959	return (atomic_testandset_32((volatile uint32_t *)p, v));
960}
961#define	atomic_testandset_long	atomic_testandset_long
962
963static __inline int
964atomic_testandset_64(volatile uint64_t *p, u_int v)
965{
966	volatile uint32_t *p32;
967
968	p32 = (volatile uint32_t *)p;
969	/*
970	 * Assume little-endian,
971	 * atomic_testandset_32() uses only last 5 bits of v
972	 */
973	if ((v & 0x20) != 0)
974		p32++;
975	return (atomic_testandset_32(p32, v));
976}
977
978static __inline uint32_t
979atomic_swap_32(volatile uint32_t *p, uint32_t v)
980{
981	uint32_t ret, exflag;
982
983	__asm __volatile(
984	    "1: ldrex	%[ret], [%[ptr]]		\n"
985	    "   strex	%[exf], %[val], [%[ptr]]	\n"
986	    "   teq	%[exf], #0			\n"
987	    "   it	ne				\n"
988	    "   bne	1b				\n"
989	    : [ret] "=&r"  (ret),
990	      [exf] "=&r" (exflag)
991	    : [val] "r"  (v),
992	      [ptr] "r"  (p)
993	    : "cc", "memory");
994	return (ret);
995}
996
997static __inline u_long
998atomic_swap_long(volatile u_long *p, u_long v)
999{
1000
1001	return (atomic_swap_32((volatile uint32_t *)p, v));
1002}
1003
1004static __inline uint64_t
1005atomic_swap_64(volatile uint64_t *p, uint64_t v)
1006{
1007	uint64_t ret;
1008	uint32_t exflag;
1009
1010	__asm __volatile(
1011	    "1: ldrexd	%Q[ret], %R[ret], [%[ptr]]		\n"
1012	    "   strexd	%[exf], %Q[val], %R[val], [%[ptr]]	\n"
1013	    "   teq	%[exf], #0				\n"
1014	    "   it	ne					\n"
1015	    "   bne	1b					\n"
1016	    : [ret] "=&r" (ret),
1017	      [exf] "=&r" (exflag)
1018	    : [val] "r"   (v),
1019	      [ptr] "r"   (p)
1020	    : "cc", "memory");
1021	return (ret);
1022}
1023
1024#undef ATOMIC_ACQ_REL
1025#undef ATOMIC_ACQ_REL_LONG
1026
1027static __inline void
1028atomic_thread_fence_acq(void)
1029{
1030
1031	dmb();
1032}
1033
1034static __inline void
1035atomic_thread_fence_rel(void)
1036{
1037
1038	dmb();
1039}
1040
1041static __inline void
1042atomic_thread_fence_acq_rel(void)
1043{
1044
1045	dmb();
1046}
1047
1048static __inline void
1049atomic_thread_fence_seq_cst(void)
1050{
1051
1052	dmb();
1053}
1054
1055#define atomic_add_ptr			atomic_add_32
1056#define atomic_add_acq_ptr		atomic_add_acq_32
1057#define atomic_add_rel_ptr		atomic_add_rel_32
1058#define atomic_subtract_ptr		atomic_subtract_32
1059#define atomic_subtract_acq_ptr		atomic_subtract_acq_32
1060#define atomic_subtract_rel_ptr		atomic_subtract_rel_32
1061#define atomic_clear_ptr		atomic_clear_32
1062#define atomic_clear_acq_ptr		atomic_clear_acq_32
1063#define atomic_clear_rel_ptr		atomic_clear_rel_32
1064#define atomic_set_ptr			atomic_set_32
1065#define atomic_set_acq_ptr		atomic_set_acq_32
1066#define atomic_set_rel_ptr		atomic_set_rel_32
1067#define atomic_fcmpset_ptr		atomic_fcmpset_32
1068#define atomic_fcmpset_acq_ptr		atomic_fcmpset_acq_32
1069#define atomic_fcmpset_rel_ptr		atomic_fcmpset_rel_32
1070#define atomic_cmpset_ptr		atomic_cmpset_32
1071#define atomic_cmpset_acq_ptr		atomic_cmpset_acq_32
1072#define atomic_cmpset_rel_ptr		atomic_cmpset_rel_32
1073#define atomic_fetchadd_ptr		atomic_fetchadd_32
1074#define atomic_readandclear_ptr		atomic_readandclear_32
1075#define atomic_load_acq_ptr		atomic_load_acq_32
1076#define atomic_store_rel_ptr		atomic_store_rel_32
1077#define atomic_swap_ptr			atomic_swap_32
1078
1079#define atomic_add_int			atomic_add_32
1080#define atomic_add_acq_int		atomic_add_acq_32
1081#define atomic_add_rel_int		atomic_add_rel_32
1082#define atomic_subtract_int		atomic_subtract_32
1083#define atomic_subtract_acq_int		atomic_subtract_acq_32
1084#define atomic_subtract_rel_int		atomic_subtract_rel_32
1085#define atomic_clear_int		atomic_clear_32
1086#define atomic_clear_acq_int		atomic_clear_acq_32
1087#define atomic_clear_rel_int		atomic_clear_rel_32
1088#define atomic_set_int			atomic_set_32
1089#define atomic_set_acq_int		atomic_set_acq_32
1090#define atomic_set_rel_int		atomic_set_rel_32
1091#define atomic_fcmpset_int		atomic_fcmpset_32
1092#define atomic_fcmpset_acq_int		atomic_fcmpset_acq_32
1093#define atomic_fcmpset_rel_int		atomic_fcmpset_rel_32
1094#define atomic_cmpset_int		atomic_cmpset_32
1095#define atomic_cmpset_acq_int		atomic_cmpset_acq_32
1096#define atomic_cmpset_rel_int		atomic_cmpset_rel_32
1097#define atomic_fetchadd_int		atomic_fetchadd_32
1098#define atomic_readandclear_int		atomic_readandclear_32
1099#define atomic_load_acq_int		atomic_load_acq_32
1100#define atomic_store_rel_int		atomic_store_rel_32
1101#define atomic_swap_int			atomic_swap_32
1102
1103/*
1104 * For:
1105 *  - atomic_load_acq_8
1106 *  - atomic_load_acq_16
1107 *  - atomic_testandset_acq_long
1108 */
1109#include <sys/_atomic_subword.h>
1110
1111#endif /* _MACHINE_ATOMIC_H_ */
1112