atomic.h revision 271310
1/* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
2
3/*-
4 * Copyright (C) 2003-2004 Olivier Houchard
5 * Copyright (C) 1994-1997 Mark Brinicombe
6 * Copyright (C) 1994 Brini
7 * All rights reserved.
8 *
9 * This code is derived from software written for Brini by Mark Brinicombe
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by Brini.
22 * 4. The name of Brini may not be used to endorse or promote products
23 *    derived from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
26 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
28 * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
31 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
32 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
33 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
34 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 *
36 * $FreeBSD: head/sys/arm/include/atomic.h 271310 2014-09-09 13:50:21Z ian $
37 */
38
39#ifndef	_MACHINE_ATOMIC_H_
40#define	_MACHINE_ATOMIC_H_
41
42#include <sys/types.h>
43
44#ifndef _KERNEL
45#include <machine/sysarch.h>
46#else
47#include <machine/cpuconf.h>
48#endif
49
50#if defined (__ARM_ARCH_7__) || defined (__ARM_ARCH_7A__)
51#define isb()  __asm __volatile("isb" : : : "memory")
52#define dsb()  __asm __volatile("dsb" : : : "memory")
53#define dmb()  __asm __volatile("dmb" : : : "memory")
54#elif defined (__ARM_ARCH_6__) || defined (__ARM_ARCH_6J__) || \
55  defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6T2__) || \
56  defined (__ARM_ARCH_6Z__) || defined (__ARM_ARCH_6ZK__)
57#define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
58#define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
59#define dmb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
60#else
61#define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
62#define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
63#define dmb()  dsb()
64#endif
65
66#define mb()   dmb()
67#define wmb()  dmb()
68#define rmb()  dmb()
69
70#ifndef I32_bit
71#define I32_bit (1 << 7)        /* IRQ disable */
72#endif
73#ifndef F32_bit
74#define F32_bit (1 << 6)        /* FIQ disable */
75#endif
76
77/*
78 * It would be nice to use _HAVE_ARMv6_INSTRUCTIONS from machine/asm.h
79 * here, but that header can't be included here because this is C
80 * code.  I would like to move the _HAVE_ARMv6_INSTRUCTIONS definition
81 * out of asm.h so it can be used in both asm and C code. - kientzle@
82 */
83#if defined (__ARM_ARCH_7__) || \
84	defined (__ARM_ARCH_7A__)  || \
85	defined (__ARM_ARCH_6__)   || \
86	defined (__ARM_ARCH_6J__)  || \
87	defined (__ARM_ARCH_6K__)  || \
88	defined (__ARM_ARCH_6T2__) || \
89	defined (__ARM_ARCH_6Z__)  || \
90	defined (__ARM_ARCH_6ZK__)
91#define	ARM_HAVE_ATOMIC64
92
93static __inline void
94__do_dmb(void)
95{
96
97#if defined (__ARM_ARCH_7__) || defined (__ARM_ARCH_7A__)
98	__asm __volatile("dmb" : : : "memory");
99#else
100	__asm __volatile("mcr p15, 0, r0, c7, c10, 5" : : : "memory");
101#endif
102}
103
104#define ATOMIC_ACQ_REL_LONG(NAME)					\
105static __inline void							\
106atomic_##NAME##_acq_long(__volatile u_long *p, u_long v)		\
107{									\
108	atomic_##NAME##_long(p, v);					\
109	__do_dmb();							\
110}									\
111									\
112static __inline  void							\
113atomic_##NAME##_rel_long(__volatile u_long *p, u_long v)		\
114{									\
115	__do_dmb();							\
116	atomic_##NAME##_long(p, v);					\
117}
118
119#define	ATOMIC_ACQ_REL(NAME, WIDTH)					\
120static __inline  void							\
121atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
122{									\
123	atomic_##NAME##_##WIDTH(p, v);					\
124	__do_dmb();							\
125}									\
126									\
127static __inline  void							\
128atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
129{									\
130	__do_dmb();							\
131	atomic_##NAME##_##WIDTH(p, v);					\
132}
133
134static __inline void
135atomic_set_32(volatile uint32_t *address, uint32_t setmask)
136{
137	uint32_t tmp = 0, tmp2 = 0;
138
139	__asm __volatile("1: ldrex %0, [%2]\n"
140	    		    "orr %0, %0, %3\n"
141			    "strex %1, %0, [%2]\n"
142			    "cmp %1, #0\n"
143	                    "it ne\n"
144			    "bne	1b\n"
145			   : "=&r" (tmp), "+r" (tmp2)
146			   , "+r" (address), "+r" (setmask) : : "cc", "memory");
147
148}
149
150static __inline void
151atomic_set_64(volatile uint64_t *p, uint64_t val)
152{
153	uint64_t tmp;
154	uint32_t exflag;
155
156	__asm __volatile(
157		"1:          \n"
158		"   ldrexd   %[tmp], [%[ptr]]\n"
159		"   orr      %Q[tmp], %Q[val]\n"
160		"   orr      %R[tmp], %R[val]\n"
161		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
162		"   teq      %[exf], #0\n"
163		"   it ne    \n"
164		"   bne      1b\n"
165		:   [exf]    "=&r"  (exflag),
166		    [tmp]    "=&r"  (tmp)
167		:   [ptr]    "r"    (p),
168		    [val]    "r"    (val)
169		:   "cc", "memory");
170}
171
172static __inline void
173atomic_set_long(volatile u_long *address, u_long setmask)
174{
175	u_long tmp = 0, tmp2 = 0;
176
177	__asm __volatile("1: ldrex %0, [%2]\n"
178	    		    "orr %0, %0, %3\n"
179			    "strex %1, %0, [%2]\n"
180			    "cmp %1, #0\n"
181	                    "it ne\n"
182			    "bne	1b\n"
183			   : "=&r" (tmp), "+r" (tmp2)
184			   , "+r" (address), "+r" (setmask) : : "cc", "memory");
185
186}
187
188static __inline void
189atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
190{
191	uint32_t tmp = 0, tmp2 = 0;
192
193	__asm __volatile("1: ldrex %0, [%2]\n"
194	    		    "bic %0, %0, %3\n"
195			    "strex %1, %0, [%2]\n"
196			    "cmp %1, #0\n"
197	                    "it ne\n"
198			    "bne	1b\n"
199			   : "=&r" (tmp), "+r" (tmp2)
200			   ,"+r" (address), "+r" (setmask) : : "cc", "memory");
201}
202
203static __inline void
204atomic_clear_64(volatile uint64_t *p, uint64_t val)
205{
206	uint64_t tmp;
207	uint32_t exflag;
208
209	__asm __volatile(
210		"1:          \n"
211		"   ldrexd   %[tmp], [%[ptr]]\n"
212		"   bic      %Q[tmp], %Q[val]\n"
213		"   bic      %R[tmp], %R[val]\n"
214		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
215		"   teq      %[exf], #0\n"
216		"   it ne    \n"
217		"   bne      1b\n"
218		:   [exf]    "=&r"  (exflag),
219		    [tmp]    "=&r"  (tmp)
220		:   [ptr]    "r"    (p),
221		    [val]    "r"    (val)
222		:   "cc", "memory");
223}
224
225static __inline void
226atomic_clear_long(volatile u_long *address, u_long setmask)
227{
228	u_long tmp = 0, tmp2 = 0;
229
230	__asm __volatile("1: ldrex %0, [%2]\n"
231	    		    "bic %0, %0, %3\n"
232			    "strex %1, %0, [%2]\n"
233			    "cmp %1, #0\n"
234	                    "it ne\n"
235			    "bne	1b\n"
236			   : "=&r" (tmp), "+r" (tmp2)
237			   ,"+r" (address), "+r" (setmask) : : "cc", "memory");
238}
239
240static __inline u_int32_t
241atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
242{
243	uint32_t ret;
244
245	__asm __volatile("1: ldrex %0, [%1]\n"
246	                 "cmp %0, %2\n"
247	                 "itt ne\n"
248			 "movne %0, #0\n"
249			 "bne 2f\n"
250			 "strex %0, %3, [%1]\n"
251			 "cmp %0, #0\n"
252	                 "ite eq\n"
253			 "moveq %0, #1\n"
254			 "bne	1b\n"
255			 "2:"
256			 : "=&r" (ret)
257			 ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc",
258			 "memory");
259	return (ret);
260}
261
262static __inline int
263atomic_cmpset_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
264{
265	uint64_t tmp;
266	uint32_t ret;
267
268	__asm __volatile(
269		"1:          \n"
270		"   ldrexd   %[tmp], [%[ptr]]\n"
271		"   teq      %Q[tmp], %Q[cmpval]\n"
272		"   itee eq  \n"
273		"   teqeq    %R[tmp], %R[cmpval]\n"
274		"   movne    %[ret], #0\n"
275		"   bne      2f\n"
276		"   strexd   %[ret], %[newval], [%[ptr]]\n"
277		"   teq      %[ret], #0\n"
278		"   it ne    \n"
279		"   bne      1b\n"
280		"   mov      %[ret], #1\n"
281		"2:          \n"
282		:   [ret]    "=&r"  (ret),
283		    [tmp]    "=&r"  (tmp)
284		:   [ptr]    "r"    (p),
285		    [cmpval] "r"    (cmpval),
286		    [newval] "r"    (newval)
287		:   "cc", "memory");
288	return (ret);
289}
290
291static __inline u_long
292atomic_cmpset_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
293{
294	u_long ret;
295
296	__asm __volatile("1: ldrex %0, [%1]\n"
297	                 "cmp %0, %2\n"
298	                 "itt ne\n"
299			 "movne %0, #0\n"
300			 "bne 2f\n"
301			 "strex %0, %3, [%1]\n"
302			 "cmp %0, #0\n"
303	                 "ite eq\n"
304			 "moveq %0, #1\n"
305			 "bne	1b\n"
306			 "2:"
307			 : "=&r" (ret)
308			 ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc",
309			 "memory");
310	return (ret);
311}
312
313static __inline u_int32_t
314atomic_cmpset_acq_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
315{
316	u_int32_t ret = atomic_cmpset_32(p, cmpval, newval);
317
318	__do_dmb();
319	return (ret);
320}
321
322static __inline uint64_t
323atomic_cmpset_acq_64(volatile uint64_t *p, volatile uint64_t cmpval, volatile uint64_t newval)
324{
325	uint64_t ret = atomic_cmpset_64(p, cmpval, newval);
326
327	__do_dmb();
328	return (ret);
329}
330
331static __inline u_long
332atomic_cmpset_acq_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
333{
334	u_long ret = atomic_cmpset_long(p, cmpval, newval);
335
336	__do_dmb();
337	return (ret);
338}
339
340static __inline u_int32_t
341atomic_cmpset_rel_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
342{
343
344	__do_dmb();
345	return (atomic_cmpset_32(p, cmpval, newval));
346}
347
348static __inline uint64_t
349atomic_cmpset_rel_64(volatile uint64_t *p, volatile uint64_t cmpval, volatile uint64_t newval)
350{
351
352	__do_dmb();
353	return (atomic_cmpset_64(p, cmpval, newval));
354}
355
356static __inline u_long
357atomic_cmpset_rel_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
358{
359
360	__do_dmb();
361	return (atomic_cmpset_long(p, cmpval, newval));
362}
363
364
365static __inline void
366atomic_add_32(volatile u_int32_t *p, u_int32_t val)
367{
368	uint32_t tmp = 0, tmp2 = 0;
369
370	__asm __volatile("1: ldrex %0, [%2]\n"
371	    		    "add %0, %0, %3\n"
372			    "strex %1, %0, [%2]\n"
373			    "cmp %1, #0\n"
374	                    "it ne\n"
375			    "bne	1b\n"
376			    : "=&r" (tmp), "+r" (tmp2)
377			    ,"+r" (p), "+r" (val) : : "cc", "memory");
378}
379
380static __inline void
381atomic_add_64(volatile uint64_t *p, uint64_t val)
382{
383	uint64_t tmp;
384	uint32_t exflag;
385
386	__asm __volatile(
387		"1:          \n"
388		"   ldrexd   %[tmp], [%[ptr]]\n"
389		"   adds     %Q[tmp], %Q[val]\n"
390		"   adc      %R[tmp], %R[val]\n"
391		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
392		"   teq      %[exf], #0\n"
393		"   it ne    \n"
394		"   bne      1b\n"
395		:   [exf]    "=&r"  (exflag),
396		    [tmp]    "=&r"  (tmp)
397		:   [ptr]    "r"    (p),
398		    [val]    "r"    (val)
399		:   "cc", "memory");
400}
401
402static __inline void
403atomic_add_long(volatile u_long *p, u_long val)
404{
405	u_long tmp = 0, tmp2 = 0;
406
407	__asm __volatile("1: ldrex %0, [%2]\n"
408	    		    "add %0, %0, %3\n"
409			    "strex %1, %0, [%2]\n"
410			    "cmp %1, #0\n"
411	                    "it ne\n"
412			    "bne	1b\n"
413			    : "=&r" (tmp), "+r" (tmp2)
414			    ,"+r" (p), "+r" (val) : : "cc", "memory");
415}
416
417static __inline void
418atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
419{
420	uint32_t tmp = 0, tmp2 = 0;
421
422	__asm __volatile("1: ldrex %0, [%2]\n"
423	    		    "sub %0, %0, %3\n"
424			    "strex %1, %0, [%2]\n"
425			    "cmp %1, #0\n"
426	                    "it ne\n"
427			    "bne	1b\n"
428			    : "=&r" (tmp), "+r" (tmp2)
429			    ,"+r" (p), "+r" (val) : : "cc", "memory");
430}
431
432static __inline void
433atomic_subtract_64(volatile uint64_t *p, uint64_t val)
434{
435	uint64_t tmp;
436	uint32_t exflag;
437
438	__asm __volatile(
439		"1:          \n"
440		"   ldrexd   %[tmp], [%[ptr]]\n"
441		"   subs     %Q[tmp], %Q[val]\n"
442		"   sbc      %R[tmp], %R[val]\n"
443		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
444		"   teq      %[exf], #0\n"
445		"   it ne    \n"
446		"   bne      1b\n"
447		:   [exf]    "=&r"  (exflag),
448		    [tmp]    "=&r"  (tmp)
449		:   [ptr]    "r"    (p),
450		    [val]    "r"    (val)
451		:   "cc", "memory");
452}
453
454static __inline void
455atomic_subtract_long(volatile u_long *p, u_long val)
456{
457	u_long tmp = 0, tmp2 = 0;
458
459	__asm __volatile("1: ldrex %0, [%2]\n"
460	    		    "sub %0, %0, %3\n"
461			    "strex %1, %0, [%2]\n"
462			    "cmp %1, #0\n"
463	                    "it ne\n"
464			    "bne	1b\n"
465			    : "=&r" (tmp), "+r" (tmp2)
466			    ,"+r" (p), "+r" (val) : : "cc", "memory");
467}
468
469ATOMIC_ACQ_REL(clear, 32)
470ATOMIC_ACQ_REL(add, 32)
471ATOMIC_ACQ_REL(subtract, 32)
472ATOMIC_ACQ_REL(set, 32)
473ATOMIC_ACQ_REL(clear, 64)
474ATOMIC_ACQ_REL(add, 64)
475ATOMIC_ACQ_REL(subtract, 64)
476ATOMIC_ACQ_REL(set, 64)
477ATOMIC_ACQ_REL_LONG(clear)
478ATOMIC_ACQ_REL_LONG(add)
479ATOMIC_ACQ_REL_LONG(subtract)
480ATOMIC_ACQ_REL_LONG(set)
481
482#undef ATOMIC_ACQ_REL
483#undef ATOMIC_ACQ_REL_LONG
484
485static __inline uint32_t
486atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
487{
488	uint32_t tmp = 0, tmp2 = 0, ret = 0;
489
490	__asm __volatile("1: ldrex %0, [%3]\n"
491	    		    "add %1, %0, %4\n"
492			    "strex %2, %1, [%3]\n"
493			    "cmp %2, #0\n"
494	                    "it ne\n"
495			    "bne	1b\n"
496			   : "+r" (ret), "=&r" (tmp), "+r" (tmp2)
497			   ,"+r" (p), "+r" (val) : : "cc", "memory");
498	return (ret);
499}
500
501static __inline uint32_t
502atomic_readandclear_32(volatile u_int32_t *p)
503{
504	uint32_t ret, tmp = 0, tmp2 = 0;
505
506	__asm __volatile("1: ldrex %0, [%3]\n"
507	    		 "mov %1, #0\n"
508			 "strex %2, %1, [%3]\n"
509			 "cmp %2, #0\n"
510	                 "it ne\n"
511			 "bne 1b\n"
512			 : "=r" (ret), "=&r" (tmp), "+r" (tmp2)
513			 ,"+r" (p) : : "cc", "memory");
514	return (ret);
515}
516
517static __inline uint32_t
518atomic_load_acq_32(volatile uint32_t *p)
519{
520	uint32_t v;
521
522	v = *p;
523	__do_dmb();
524	return (v);
525}
526
527static __inline void
528atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
529{
530
531	__do_dmb();
532	*p = v;
533}
534
535static __inline uint64_t
536atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
537{
538	uint64_t ret, tmp;
539	uint32_t exflag;
540
541	__asm __volatile(
542		"1:          \n"
543		"   ldrexd   %[ret], [%[ptr]]\n"
544		"   adds     %Q[tmp], %Q[ret], %Q[val]\n"
545		"   adc      %R[tmp], %R[ret], %R[val]\n"
546		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
547		"   teq      %[exf], #0\n"
548		"   it ne    \n"
549		"   bne      1b\n"
550		:   [ret]    "=&r"  (ret),
551		    [exf]    "=&r"  (exflag),
552		    [tmp]    "=&r"  (tmp)
553		:   [ptr]    "r"    (p),
554		    [val]    "r"    (val)
555		:   "cc", "memory");
556	return (ret);
557}
558
559static __inline uint64_t
560atomic_readandclear_64(volatile uint64_t *p)
561{
562	uint64_t ret, tmp;
563	uint32_t exflag;
564
565	__asm __volatile(
566		"1:          \n"
567		"   ldrexd   %[ret], [%[ptr]]\n"
568		"   mov      %Q[tmp], #0\n"
569		"   mov      %R[tmp], #0\n"
570		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
571		"   teq      %[exf], #0\n"
572		"   it ne    \n"
573		"   bne      1b\n"
574		:   [ret]    "=&r"  (ret),
575		    [exf]    "=&r"  (exflag),
576		    [tmp]    "=&r"  (tmp)
577		:   [ptr]    "r"    (p)
578		:   "cc", "memory");
579	return (ret);
580}
581
582static __inline uint64_t
583atomic_load_64(volatile uint64_t *p)
584{
585	uint64_t ret;
586
587	/*
588	 * The only way to atomically load 64 bits is with LDREXD which puts the
589	 * exclusive monitor into the open state, so reset it with CLREX because
590	 * we don't actually need to store anything.
591	 */
592	__asm __volatile(
593		"1:          \n"
594		"   ldrexd   %[ret], [%[ptr]]\n"
595		"   clrex    \n"
596		:   [ret]    "=&r"  (ret)
597		:   [ptr]    "r"    (p)
598		:   "cc", "memory");
599	return (ret);
600}
601
602static __inline uint64_t
603atomic_load_acq_64(volatile uint64_t *p)
604{
605	uint64_t ret;
606
607	ret = atomic_load_64(p);
608	__do_dmb();
609	return (ret);
610}
611
612static __inline void
613atomic_store_64(volatile uint64_t *p, uint64_t val)
614{
615	uint64_t tmp;
616	uint32_t exflag;
617
618	/*
619	 * The only way to atomically store 64 bits is with STREXD, which will
620	 * succeed only if paired up with a preceeding LDREXD using the same
621	 * address, so we read and discard the existing value before storing.
622	 */
623	__asm __volatile(
624		"1:          \n"
625		"   ldrexd   %[tmp], [%[ptr]]\n"
626		"   strexd   %[exf], %[val], [%[ptr]]\n"
627		"   teq      %[exf], #0\n"
628		"   it ne    \n"
629		"   bne      1b\n"
630		:   [tmp]    "=&r"  (tmp),
631		    [exf]    "=&r"  (exflag)
632		:   [ptr]    "r"    (p),
633		    [val]    "r"    (val)
634		:   "cc", "memory");
635}
636
637static __inline void
638atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
639{
640
641	__do_dmb();
642	atomic_store_64(p, val);
643}
644
645static __inline u_long
646atomic_fetchadd_long(volatile u_long *p, u_long val)
647{
648	u_long tmp = 0, tmp2 = 0, ret = 0;
649
650	__asm __volatile("1: ldrex %0, [%3]\n"
651	    		    "add %1, %0, %4\n"
652			    "strex %2, %1, [%3]\n"
653			    "cmp %2, #0\n"
654	                    "it ne\n"
655			    "bne	1b\n"
656			   : "+r" (ret), "=&r" (tmp), "+r" (tmp2)
657			   ,"+r" (p), "+r" (val) : : "cc", "memory");
658	return (ret);
659}
660
661static __inline u_long
662atomic_readandclear_long(volatile u_long *p)
663{
664	u_long ret, tmp = 0, tmp2 = 0;
665
666	__asm __volatile("1: ldrex %0, [%3]\n"
667	    		 "mov %1, #0\n"
668			 "strex %2, %1, [%3]\n"
669			 "cmp %2, #0\n"
670	                 "it ne\n"
671			 "bne 1b\n"
672			 : "=r" (ret), "=&r" (tmp), "+r" (tmp2)
673			 ,"+r" (p) : : "cc", "memory");
674	return (ret);
675}
676
677static __inline u_long
678atomic_load_acq_long(volatile u_long *p)
679{
680	u_long v;
681
682	v = *p;
683	__do_dmb();
684	return (v);
685}
686
687static __inline void
688atomic_store_rel_long(volatile u_long *p, u_long v)
689{
690
691	__do_dmb();
692	*p = v;
693}
694#else /* < armv6 */
695
696#define __with_interrupts_disabled(expr) \
697	do {						\
698		u_int cpsr_save, tmp;			\
699							\
700		__asm __volatile(			\
701			"mrs  %0, cpsr;"		\
702			"orr  %1, %0, %2;"		\
703			"msr  cpsr_fsxc, %1;"		\
704			: "=r" (cpsr_save), "=r" (tmp)	\
705			: "I" (I32_bit | F32_bit)		\
706		        : "cc" );		\
707		(expr);				\
708		 __asm __volatile(		\
709			"msr  cpsr_fsxc, %0"	\
710			: /* no output */	\
711			: "r" (cpsr_save)	\
712			: "cc" );		\
713	} while(0)
714
715static __inline uint32_t
716__swp(uint32_t val, volatile uint32_t *ptr)
717{
718	__asm __volatile("swp	%0, %2, [%3]"
719	    : "=&r" (val), "=m" (*ptr)
720	    : "r" (val), "r" (ptr), "m" (*ptr)
721	    : "memory");
722	return (val);
723}
724
725
726#ifdef _KERNEL
727#define	ARM_HAVE_ATOMIC64
728
729static __inline void
730atomic_set_32(volatile uint32_t *address, uint32_t setmask)
731{
732	__with_interrupts_disabled(*address |= setmask);
733}
734
735static __inline void
736atomic_set_64(volatile uint64_t *address, uint64_t setmask)
737{
738	__with_interrupts_disabled(*address |= setmask);
739}
740
741static __inline void
742atomic_clear_32(volatile uint32_t *address, uint32_t clearmask)
743{
744	__with_interrupts_disabled(*address &= ~clearmask);
745}
746
747static __inline void
748atomic_clear_64(volatile uint64_t *address, uint64_t clearmask)
749{
750	__with_interrupts_disabled(*address &= ~clearmask);
751}
752
753static __inline u_int32_t
754atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
755{
756	int ret;
757
758	__with_interrupts_disabled(
759	 {
760	    	if (*p == cmpval) {
761			*p = newval;
762			ret = 1;
763		} else {
764			ret = 0;
765		}
766	});
767	return (ret);
768}
769
770static __inline u_int64_t
771atomic_cmpset_64(volatile u_int64_t *p, volatile u_int64_t cmpval, volatile u_int64_t newval)
772{
773	int ret;
774
775	__with_interrupts_disabled(
776	 {
777	    	if (*p == cmpval) {
778			*p = newval;
779			ret = 1;
780		} else {
781			ret = 0;
782		}
783	});
784	return (ret);
785}
786
787static __inline void
788atomic_add_32(volatile u_int32_t *p, u_int32_t val)
789{
790	__with_interrupts_disabled(*p += val);
791}
792
793static __inline void
794atomic_add_64(volatile u_int64_t *p, u_int64_t val)
795{
796	__with_interrupts_disabled(*p += val);
797}
798
799static __inline void
800atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
801{
802	__with_interrupts_disabled(*p -= val);
803}
804
805static __inline void
806atomic_subtract_64(volatile u_int64_t *p, u_int64_t val)
807{
808	__with_interrupts_disabled(*p -= val);
809}
810
811static __inline uint32_t
812atomic_fetchadd_32(volatile uint32_t *p, uint32_t v)
813{
814	uint32_t value;
815
816	__with_interrupts_disabled(
817	{
818	    	value = *p;
819		*p += v;
820	});
821	return (value);
822}
823
824static __inline uint64_t
825atomic_fetchadd_64(volatile uint64_t *p, uint64_t v)
826{
827	uint64_t value;
828
829	__with_interrupts_disabled(
830	{
831	    	value = *p;
832		*p += v;
833	});
834	return (value);
835}
836
837static __inline uint64_t
838atomic_load_64(volatile uint64_t *p)
839{
840	uint64_t value;
841
842	__with_interrupts_disabled(value = *p);
843	return (value);
844}
845
846static __inline void
847atomic_store_64(volatile uint64_t *p, uint64_t value)
848{
849	__with_interrupts_disabled(*p = value);
850}
851
852#else /* !_KERNEL */
853
854static __inline u_int32_t
855atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
856{
857	register int done, ras_start = ARM_RAS_START;
858
859	__asm __volatile("1:\n"
860	    "adr	%1, 1b\n"
861	    "str	%1, [%0]\n"
862	    "adr	%1, 2f\n"
863	    "str	%1, [%0, #4]\n"
864	    "ldr	%1, [%2]\n"
865	    "cmp	%1, %3\n"
866	    "streq	%4, [%2]\n"
867	    "2:\n"
868	    "mov	%1, #0\n"
869	    "str	%1, [%0]\n"
870	    "mov	%1, #0xffffffff\n"
871	    "str	%1, [%0, #4]\n"
872	    "moveq	%1, #1\n"
873	    "movne	%1, #0\n"
874	    : "+r" (ras_start), "=r" (done)
875	    ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc", "memory");
876	return (done);
877}
878
879static __inline void
880atomic_add_32(volatile u_int32_t *p, u_int32_t val)
881{
882	int start, ras_start = ARM_RAS_START;
883
884	__asm __volatile("1:\n"
885	    "adr	%1, 1b\n"
886	    "str	%1, [%0]\n"
887	    "adr	%1, 2f\n"
888	    "str	%1, [%0, #4]\n"
889	    "ldr	%1, [%2]\n"
890	    "add	%1, %1, %3\n"
891	    "str	%1, [%2]\n"
892	    "2:\n"
893	    "mov	%1, #0\n"
894	    "str	%1, [%0]\n"
895	    "mov	%1, #0xffffffff\n"
896	    "str	%1, [%0, #4]\n"
897	    : "+r" (ras_start), "=r" (start), "+r" (p), "+r" (val)
898	    : : "memory");
899}
900
901static __inline void
902atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
903{
904	int start, ras_start = ARM_RAS_START;
905
906	__asm __volatile("1:\n"
907	    "adr	%1, 1b\n"
908	    "str	%1, [%0]\n"
909	    "adr	%1, 2f\n"
910	    "str	%1, [%0, #4]\n"
911	    "ldr	%1, [%2]\n"
912	    "sub	%1, %1, %3\n"
913	    "str	%1, [%2]\n"
914	    "2:\n"
915	    "mov	%1, #0\n"
916	    "str	%1, [%0]\n"
917	    "mov	%1, #0xffffffff\n"
918	    "str	%1, [%0, #4]\n"
919
920	    : "+r" (ras_start), "=r" (start), "+r" (p), "+r" (val)
921	    : : "memory");
922}
923
924static __inline void
925atomic_set_32(volatile uint32_t *address, uint32_t setmask)
926{
927	int start, ras_start = ARM_RAS_START;
928
929	__asm __volatile("1:\n"
930	    "adr	%1, 1b\n"
931	    "str	%1, [%0]\n"
932	    "adr	%1, 2f\n"
933	    "str	%1, [%0, #4]\n"
934	    "ldr	%1, [%2]\n"
935	    "orr	%1, %1, %3\n"
936	    "str	%1, [%2]\n"
937	    "2:\n"
938	    "mov	%1, #0\n"
939	    "str	%1, [%0]\n"
940	    "mov	%1, #0xffffffff\n"
941	    "str	%1, [%0, #4]\n"
942
943	    : "+r" (ras_start), "=r" (start), "+r" (address), "+r" (setmask)
944	    : : "memory");
945}
946
947static __inline void
948atomic_clear_32(volatile uint32_t *address, uint32_t clearmask)
949{
950	int start, ras_start = ARM_RAS_START;
951
952	__asm __volatile("1:\n"
953	    "adr	%1, 1b\n"
954	    "str	%1, [%0]\n"
955	    "adr	%1, 2f\n"
956	    "str	%1, [%0, #4]\n"
957	    "ldr	%1, [%2]\n"
958	    "bic	%1, %1, %3\n"
959	    "str	%1, [%2]\n"
960	    "2:\n"
961	    "mov	%1, #0\n"
962	    "str	%1, [%0]\n"
963	    "mov	%1, #0xffffffff\n"
964	    "str	%1, [%0, #4]\n"
965	    : "+r" (ras_start), "=r" (start), "+r" (address), "+r" (clearmask)
966	    : : "memory");
967
968}
969
970static __inline uint32_t
971atomic_fetchadd_32(volatile uint32_t *p, uint32_t v)
972{
973	uint32_t start, tmp, ras_start = ARM_RAS_START;
974
975	__asm __volatile("1:\n"
976	    "adr	%1, 1b\n"
977	    "str	%1, [%0]\n"
978	    "adr	%1, 2f\n"
979	    "str	%1, [%0, #4]\n"
980	    "ldr	%1, [%3]\n"
981	    "mov	%2, %1\n"
982	    "add	%2, %2, %4\n"
983	    "str	%2, [%3]\n"
984	    "2:\n"
985	    "mov	%2, #0\n"
986	    "str	%2, [%0]\n"
987	    "mov	%2, #0xffffffff\n"
988	    "str	%2, [%0, #4]\n"
989	    : "+r" (ras_start), "=r" (start), "=r" (tmp), "+r" (p), "+r" (v)
990	    : : "memory");
991	return (start);
992}
993
994#endif /* _KERNEL */
995
996
997static __inline uint32_t
998atomic_readandclear_32(volatile u_int32_t *p)
999{
1000
1001	return (__swp(0, p));
1002}
1003
1004#define atomic_cmpset_rel_32	atomic_cmpset_32
1005#define atomic_cmpset_acq_32	atomic_cmpset_32
1006#define atomic_set_rel_32	atomic_set_32
1007#define atomic_set_acq_32	atomic_set_32
1008#define atomic_clear_rel_32	atomic_clear_32
1009#define atomic_clear_acq_32	atomic_clear_32
1010#define atomic_add_rel_32	atomic_add_32
1011#define atomic_add_acq_32	atomic_add_32
1012#define atomic_subtract_rel_32	atomic_subtract_32
1013#define atomic_subtract_acq_32	atomic_subtract_32
1014#define atomic_store_rel_32	atomic_store_32
1015#define atomic_store_rel_long	atomic_store_long
1016#define atomic_load_acq_32	atomic_load_32
1017#define atomic_load_acq_long	atomic_load_long
1018#define atomic_add_acq_long		atomic_add_long
1019#define atomic_add_rel_long		atomic_add_long
1020#define atomic_subtract_acq_long	atomic_subtract_long
1021#define atomic_subtract_rel_long	atomic_subtract_long
1022#define atomic_clear_acq_long		atomic_clear_long
1023#define atomic_clear_rel_long		atomic_clear_long
1024#define atomic_set_acq_long		atomic_set_long
1025#define atomic_set_rel_long		atomic_set_long
1026#define atomic_cmpset_acq_long		atomic_cmpset_long
1027#define atomic_cmpset_rel_long		atomic_cmpset_long
1028#define atomic_load_acq_long		atomic_load_long
1029#undef __with_interrupts_disabled
1030
1031static __inline void
1032atomic_add_long(volatile u_long *p, u_long v)
1033{
1034
1035	atomic_add_32((volatile uint32_t *)p, v);
1036}
1037
1038static __inline void
1039atomic_clear_long(volatile u_long *p, u_long v)
1040{
1041
1042	atomic_clear_32((volatile uint32_t *)p, v);
1043}
1044
1045static __inline int
1046atomic_cmpset_long(volatile u_long *dst, u_long old, u_long newe)
1047{
1048
1049	return (atomic_cmpset_32((volatile uint32_t *)dst, old, newe));
1050}
1051
1052static __inline u_long
1053atomic_fetchadd_long(volatile u_long *p, u_long v)
1054{
1055
1056	return (atomic_fetchadd_32((volatile uint32_t *)p, v));
1057}
1058
1059static __inline void
1060atomic_readandclear_long(volatile u_long *p)
1061{
1062
1063	atomic_readandclear_32((volatile uint32_t *)p);
1064}
1065
1066static __inline void
1067atomic_set_long(volatile u_long *p, u_long v)
1068{
1069
1070	atomic_set_32((volatile uint32_t *)p, v);
1071}
1072
1073static __inline void
1074atomic_subtract_long(volatile u_long *p, u_long v)
1075{
1076
1077	atomic_subtract_32((volatile uint32_t *)p, v);
1078}
1079
1080
1081
1082#endif /* Arch >= v6 */
1083
1084static __inline int
1085atomic_load_32(volatile uint32_t *v)
1086{
1087
1088	return (*v);
1089}
1090
1091static __inline void
1092atomic_store_32(volatile uint32_t *dst, uint32_t src)
1093{
1094	*dst = src;
1095}
1096
1097static __inline int
1098atomic_load_long(volatile u_long *v)
1099{
1100
1101	return (*v);
1102}
1103
1104static __inline void
1105atomic_store_long(volatile u_long *dst, u_long src)
1106{
1107	*dst = src;
1108}
1109
1110#define atomic_clear_ptr		atomic_clear_32
1111#define atomic_set_ptr			atomic_set_32
1112#define atomic_cmpset_ptr		atomic_cmpset_32
1113#define atomic_cmpset_rel_ptr		atomic_cmpset_rel_32
1114#define atomic_cmpset_acq_ptr		atomic_cmpset_acq_32
1115#define atomic_store_ptr		atomic_store_32
1116#define atomic_store_rel_ptr		atomic_store_rel_32
1117
1118#define atomic_add_int			atomic_add_32
1119#define atomic_add_acq_int		atomic_add_acq_32
1120#define atomic_add_rel_int		atomic_add_rel_32
1121#define atomic_subtract_int		atomic_subtract_32
1122#define atomic_subtract_acq_int		atomic_subtract_acq_32
1123#define atomic_subtract_rel_int		atomic_subtract_rel_32
1124#define atomic_clear_int		atomic_clear_32
1125#define atomic_clear_acq_int		atomic_clear_acq_32
1126#define atomic_clear_rel_int		atomic_clear_rel_32
1127#define atomic_set_int			atomic_set_32
1128#define atomic_set_acq_int		atomic_set_acq_32
1129#define atomic_set_rel_int		atomic_set_rel_32
1130#define atomic_cmpset_int		atomic_cmpset_32
1131#define atomic_cmpset_acq_int		atomic_cmpset_acq_32
1132#define atomic_cmpset_rel_int		atomic_cmpset_rel_32
1133#define atomic_fetchadd_int		atomic_fetchadd_32
1134#define atomic_readandclear_int		atomic_readandclear_32
1135#define atomic_load_acq_int		atomic_load_acq_32
1136#define atomic_store_rel_int		atomic_store_rel_32
1137
1138#endif /* _MACHINE_ATOMIC_H_ */
1139