atomic.h revision 269403
1/* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
2
3/*-
4 * Copyright (C) 2003-2004 Olivier Houchard
5 * Copyright (C) 1994-1997 Mark Brinicombe
6 * Copyright (C) 1994 Brini
7 * All rights reserved.
8 *
9 * This code is derived from software written for Brini by Mark Brinicombe
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by Brini.
22 * 4. The name of Brini may not be used to endorse or promote products
23 *    derived from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
26 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
28 * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
31 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
32 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
33 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
34 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 *
36 * $FreeBSD: head/sys/arm/include/atomic.h 269403 2014-08-01 22:28:36Z ian $
37 */
38
39#ifndef	_MACHINE_ATOMIC_H_
40#define	_MACHINE_ATOMIC_H_
41
42#include <sys/types.h>
43
44#ifndef _KERNEL
45#include <machine/sysarch.h>
46#else
47#include <machine/cpuconf.h>
48#endif
49
50#if defined (__ARM_ARCH_7__) || defined (__ARM_ARCH_7A__)
51#define isb()  __asm __volatile("isb" : : : "memory")
52#define dsb()  __asm __volatile("dsb" : : : "memory")
53#define dmb()  __asm __volatile("dmb" : : : "memory")
54#elif defined (__ARM_ARCH_6__) || defined (__ARM_ARCH_6J__) || \
55  defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6T2__) || \
56  defined (__ARM_ARCH_6Z__) || defined (__ARM_ARCH_6ZK__)
57#define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
58#define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
59#define dmb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
60#else
61#define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
62#define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
63#define dmb()  dsb()
64#endif
65
66#define mb()   dmb()
67#define wmb()  dmb()
68#define rmb()  dmb()
69
70#ifndef I32_bit
71#define I32_bit (1 << 7)        /* IRQ disable */
72#endif
73#ifndef F32_bit
74#define F32_bit (1 << 6)        /* FIQ disable */
75#endif
76
77/*
78 * It would be nice to use _HAVE_ARMv6_INSTRUCTIONS from machine/asm.h
79 * here, but that header can't be included here because this is C
80 * code.  I would like to move the _HAVE_ARMv6_INSTRUCTIONS definition
81 * out of asm.h so it can be used in both asm and C code. - kientzle@
82 */
83#if defined (__ARM_ARCH_7__) || \
84	defined (__ARM_ARCH_7A__)  || \
85	defined (__ARM_ARCH_6__)   || \
86	defined (__ARM_ARCH_6J__)  || \
87	defined (__ARM_ARCH_6K__)  || \
88	defined (__ARM_ARCH_6T2__) || \
89	defined (__ARM_ARCH_6Z__)  || \
90	defined (__ARM_ARCH_6ZK__)
91static __inline void
92__do_dmb(void)
93{
94
95#if defined (__ARM_ARCH_7__) || defined (__ARM_ARCH_7A__)
96	__asm __volatile("dmb" : : : "memory");
97#else
98	__asm __volatile("mcr p15, 0, r0, c7, c10, 5" : : : "memory");
99#endif
100}
101
102#define ATOMIC_ACQ_REL_LONG(NAME)					\
103static __inline void							\
104atomic_##NAME##_acq_long(__volatile u_long *p, u_long v)		\
105{									\
106	atomic_##NAME##_long(p, v);					\
107	__do_dmb();							\
108}									\
109									\
110static __inline  void							\
111atomic_##NAME##_rel_long(__volatile u_long *p, u_long v)		\
112{									\
113	__do_dmb();							\
114	atomic_##NAME##_long(p, v);					\
115}
116
117#define	ATOMIC_ACQ_REL(NAME, WIDTH)					\
118static __inline  void							\
119atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
120{									\
121	atomic_##NAME##_##WIDTH(p, v);					\
122	__do_dmb();							\
123}									\
124									\
125static __inline  void							\
126atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
127{									\
128	__do_dmb();							\
129	atomic_##NAME##_##WIDTH(p, v);					\
130}
131
132static __inline void
133atomic_set_32(volatile uint32_t *address, uint32_t setmask)
134{
135	uint32_t tmp = 0, tmp2 = 0;
136
137	__asm __volatile("1: ldrex %0, [%2]\n"
138	    		    "orr %0, %0, %3\n"
139			    "strex %1, %0, [%2]\n"
140			    "cmp %1, #0\n"
141	                    "it ne\n"
142			    "bne	1b\n"
143			   : "=&r" (tmp), "+r" (tmp2)
144			   , "+r" (address), "+r" (setmask) : : "cc", "memory");
145
146}
147
148static __inline void
149atomic_set_64(volatile uint64_t *p, uint64_t val)
150{
151	uint64_t tmp;
152	uint32_t exflag;
153
154	__asm __volatile(
155		"1:          \n"
156		"   ldrexd   %[tmp], [%[ptr]]\n"
157		"   orr      %Q[tmp], %Q[val]\n"
158		"   orr      %R[tmp], %R[val]\n"
159		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
160		"   teq      %[exf], #0\n"
161		"   it ne    \n"
162		"   bne      1b\n"
163		:   [exf]    "=&r"  (exflag),
164		    [tmp]    "=&r"  (tmp)
165		:   [ptr]    "r"    (p),
166		    [val]    "r"    (val)
167		:   "cc", "memory");
168}
169
170static __inline void
171atomic_set_long(volatile u_long *address, u_long setmask)
172{
173	u_long tmp = 0, tmp2 = 0;
174
175	__asm __volatile("1: ldrex %0, [%2]\n"
176	    		    "orr %0, %0, %3\n"
177			    "strex %1, %0, [%2]\n"
178			    "cmp %1, #0\n"
179	                    "it ne\n"
180			    "bne	1b\n"
181			   : "=&r" (tmp), "+r" (tmp2)
182			   , "+r" (address), "+r" (setmask) : : "cc", "memory");
183
184}
185
186static __inline void
187atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
188{
189	uint32_t tmp = 0, tmp2 = 0;
190
191	__asm __volatile("1: ldrex %0, [%2]\n"
192	    		    "bic %0, %0, %3\n"
193			    "strex %1, %0, [%2]\n"
194			    "cmp %1, #0\n"
195	                    "it ne\n"
196			    "bne	1b\n"
197			   : "=&r" (tmp), "+r" (tmp2)
198			   ,"+r" (address), "+r" (setmask) : : "cc", "memory");
199}
200
201static __inline void
202atomic_clear_64(volatile uint64_t *p, uint64_t val)
203{
204	uint64_t tmp;
205	uint32_t exflag;
206
207	__asm __volatile(
208		"1:          \n"
209		"   ldrexd   %[tmp], [%[ptr]]\n"
210		"   bic      %Q[tmp], %Q[val]\n"
211		"   bic      %R[tmp], %R[val]\n"
212		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
213		"   teq      %[exf], #0\n"
214		"   it ne    \n"
215		"   bne      1b\n"
216		:   [exf]    "=&r"  (exflag),
217		    [tmp]    "=&r"  (tmp)
218		:   [ptr]    "r"    (p),
219		    [val]    "r"    (val)
220		:   "cc", "memory");
221}
222
223static __inline void
224atomic_clear_long(volatile u_long *address, u_long setmask)
225{
226	u_long tmp = 0, tmp2 = 0;
227
228	__asm __volatile("1: ldrex %0, [%2]\n"
229	    		    "bic %0, %0, %3\n"
230			    "strex %1, %0, [%2]\n"
231			    "cmp %1, #0\n"
232	                    "it ne\n"
233			    "bne	1b\n"
234			   : "=&r" (tmp), "+r" (tmp2)
235			   ,"+r" (address), "+r" (setmask) : : "cc", "memory");
236}
237
238static __inline u_int32_t
239atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
240{
241	uint32_t ret;
242
243	__asm __volatile("1: ldrex %0, [%1]\n"
244	                 "cmp %0, %2\n"
245	                 "itt ne\n"
246			 "movne %0, #0\n"
247			 "bne 2f\n"
248			 "strex %0, %3, [%1]\n"
249			 "cmp %0, #0\n"
250	                 "ite eq\n"
251			 "moveq %0, #1\n"
252			 "bne	1b\n"
253			 "2:"
254			 : "=&r" (ret)
255			 ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc",
256			 "memory");
257	return (ret);
258}
259
260static __inline int
261atomic_cmpset_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
262{
263	uint64_t tmp;
264	uint32_t ret;
265
266	__asm __volatile(
267		"1:          \n"
268		"   ldrexd   %[tmp], [%[ptr]]\n"
269		"   teq      %Q[tmp], %Q[cmp]\n"
270		"   itee eq  \n"
271		"   teqeq    %R[tmp], %R[cmp]\n"
272		"   movne    %[ret], #0\n"
273		"   bne      2f\n"
274		"   strexd   %[ret], %[new], [%[ptr]]\n"
275		"   teq      %[ret], #0\n"
276		"   it ne    \n"
277		"   bne      1b\n"
278		"   mov      %[ret], #1\n"
279		"2:          \n"
280		:   [ret]    "=&r"  (ret),
281		    [tmp]    "=&r"  (tmp)
282		:   [ptr]    "r"    (p),
283		    [cmp]    "r"    (cmpval),
284		    [new]    "r"    (newval)
285		:   "cc", "memory");
286	return (ret);
287}
288
289static __inline u_long
290atomic_cmpset_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
291{
292	u_long ret;
293
294	__asm __volatile("1: ldrex %0, [%1]\n"
295	                 "cmp %0, %2\n"
296	                 "itt ne\n"
297			 "movne %0, #0\n"
298			 "bne 2f\n"
299			 "strex %0, %3, [%1]\n"
300			 "cmp %0, #0\n"
301	                 "ite eq\n"
302			 "moveq %0, #1\n"
303			 "bne	1b\n"
304			 "2:"
305			 : "=&r" (ret)
306			 ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc",
307			 "memory");
308	return (ret);
309}
310
311static __inline u_int32_t
312atomic_cmpset_acq_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
313{
314	u_int32_t ret = atomic_cmpset_32(p, cmpval, newval);
315
316	__do_dmb();
317	return (ret);
318}
319
320static __inline uint64_t
321atomic_cmpset_acq_64(volatile uint64_t *p, volatile uint64_t cmpval, volatile uint64_t newval)
322{
323	uint64_t ret = atomic_cmpset_64(p, cmpval, newval);
324
325	__do_dmb();
326	return (ret);
327}
328
329static __inline u_long
330atomic_cmpset_acq_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
331{
332	u_long ret = atomic_cmpset_long(p, cmpval, newval);
333
334	__do_dmb();
335	return (ret);
336}
337
338static __inline u_int32_t
339atomic_cmpset_rel_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
340{
341
342	__do_dmb();
343	return (atomic_cmpset_32(p, cmpval, newval));
344}
345
346static __inline uint64_t
347atomic_cmpset_rel_64(volatile uint64_t *p, volatile uint64_t cmpval, volatile uint64_t newval)
348{
349
350	__do_dmb();
351	return (atomic_cmpset_64(p, cmpval, newval));
352}
353
354static __inline u_long
355atomic_cmpset_rel_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
356{
357
358	__do_dmb();
359	return (atomic_cmpset_long(p, cmpval, newval));
360}
361
362
363static __inline void
364atomic_add_32(volatile u_int32_t *p, u_int32_t val)
365{
366	uint32_t tmp = 0, tmp2 = 0;
367
368	__asm __volatile("1: ldrex %0, [%2]\n"
369	    		    "add %0, %0, %3\n"
370			    "strex %1, %0, [%2]\n"
371			    "cmp %1, #0\n"
372	                    "it ne\n"
373			    "bne	1b\n"
374			    : "=&r" (tmp), "+r" (tmp2)
375			    ,"+r" (p), "+r" (val) : : "cc", "memory");
376}
377
378static __inline void
379atomic_add_64(volatile uint64_t *p, uint64_t val)
380{
381	uint64_t tmp;
382	uint32_t exflag;
383
384	__asm __volatile(
385		"1:          \n"
386		"   ldrexd   %[tmp], [%[ptr]]\n"
387		"   adds     %Q[tmp], %Q[val]\n"
388		"   adc      %R[tmp], %R[val]\n"
389		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
390		"   teq      %[exf], #0\n"
391		"   it ne    \n"
392		"   bne      1b\n"
393		:   [exf]    "=&r"  (exflag),
394		    [tmp]    "=&r"  (tmp)
395		:   [ptr]    "r"    (p),
396		    [val]    "r"    (val)
397		:   "cc", "memory");
398}
399
400static __inline void
401atomic_add_long(volatile u_long *p, u_long val)
402{
403	u_long tmp = 0, tmp2 = 0;
404
405	__asm __volatile("1: ldrex %0, [%2]\n"
406	    		    "add %0, %0, %3\n"
407			    "strex %1, %0, [%2]\n"
408			    "cmp %1, #0\n"
409	                    "it ne\n"
410			    "bne	1b\n"
411			    : "=&r" (tmp), "+r" (tmp2)
412			    ,"+r" (p), "+r" (val) : : "cc", "memory");
413}
414
415static __inline void
416atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
417{
418	uint32_t tmp = 0, tmp2 = 0;
419
420	__asm __volatile("1: ldrex %0, [%2]\n"
421	    		    "sub %0, %0, %3\n"
422			    "strex %1, %0, [%2]\n"
423			    "cmp %1, #0\n"
424	                    "it ne\n"
425			    "bne	1b\n"
426			    : "=&r" (tmp), "+r" (tmp2)
427			    ,"+r" (p), "+r" (val) : : "cc", "memory");
428}
429
430static __inline void
431atomic_subtract_64(volatile uint64_t *p, uint64_t val)
432{
433	uint64_t tmp;
434	uint32_t exflag;
435
436	__asm __volatile(
437		"1:          \n"
438		"   ldrexd   %[tmp], [%[ptr]]\n"
439		"   subs     %Q[tmp], %Q[val]\n"
440		"   sbc      %R[tmp], %R[val]\n"
441		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
442		"   teq      %[exf], #0\n"
443		"   it ne    \n"
444		"   bne      1b\n"
445		:   [exf]    "=&r"  (exflag),
446		    [tmp]    "=&r"  (tmp)
447		:   [ptr]    "r"    (p),
448		    [val]    "r"    (val)
449		:   "cc", "memory");
450}
451
452static __inline void
453atomic_subtract_long(volatile u_long *p, u_long val)
454{
455	u_long tmp = 0, tmp2 = 0;
456
457	__asm __volatile("1: ldrex %0, [%2]\n"
458	    		    "sub %0, %0, %3\n"
459			    "strex %1, %0, [%2]\n"
460			    "cmp %1, #0\n"
461	                    "it ne\n"
462			    "bne	1b\n"
463			    : "=&r" (tmp), "+r" (tmp2)
464			    ,"+r" (p), "+r" (val) : : "cc", "memory");
465}
466
467ATOMIC_ACQ_REL(clear, 32)
468ATOMIC_ACQ_REL(add, 32)
469ATOMIC_ACQ_REL(subtract, 32)
470ATOMIC_ACQ_REL(set, 32)
471ATOMIC_ACQ_REL(clear, 64)
472ATOMIC_ACQ_REL(add, 64)
473ATOMIC_ACQ_REL(subtract, 64)
474ATOMIC_ACQ_REL(set, 64)
475ATOMIC_ACQ_REL_LONG(clear)
476ATOMIC_ACQ_REL_LONG(add)
477ATOMIC_ACQ_REL_LONG(subtract)
478ATOMIC_ACQ_REL_LONG(set)
479
480#undef ATOMIC_ACQ_REL
481#undef ATOMIC_ACQ_REL_LONG
482
483static __inline uint32_t
484atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
485{
486	uint32_t tmp = 0, tmp2 = 0, ret = 0;
487
488	__asm __volatile("1: ldrex %0, [%3]\n"
489	    		    "add %1, %0, %4\n"
490			    "strex %2, %1, [%3]\n"
491			    "cmp %2, #0\n"
492	                    "it ne\n"
493			    "bne	1b\n"
494			   : "+r" (ret), "=&r" (tmp), "+r" (tmp2)
495			   ,"+r" (p), "+r" (val) : : "cc", "memory");
496	return (ret);
497}
498
499static __inline uint32_t
500atomic_readandclear_32(volatile u_int32_t *p)
501{
502	uint32_t ret, tmp = 0, tmp2 = 0;
503
504	__asm __volatile("1: ldrex %0, [%3]\n"
505	    		 "mov %1, #0\n"
506			 "strex %2, %1, [%3]\n"
507			 "cmp %2, #0\n"
508	                 "it ne\n"
509			 "bne 1b\n"
510			 : "=r" (ret), "=&r" (tmp), "+r" (tmp2)
511			 ,"+r" (p) : : "cc", "memory");
512	return (ret);
513}
514
515static __inline uint32_t
516atomic_load_acq_32(volatile uint32_t *p)
517{
518	uint32_t v;
519
520	v = *p;
521	__do_dmb();
522	return (v);
523}
524
525static __inline void
526atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
527{
528
529	__do_dmb();
530	*p = v;
531}
532
533static __inline uint64_t
534atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
535{
536	uint64_t ret, tmp;
537	uint32_t exflag;
538
539	__asm __volatile(
540		"1:          \n"
541		"   ldrexd   %[ret], [%[ptr]]\n"
542		"   adds     %Q[tmp], %Q[ret], %Q[val]\n"
543		"   adc      %R[tmp], %R[ret], %R[val]\n"
544		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
545		"   teq      %[exf], #0\n"
546		"   it ne    \n"
547		"   bne      1b\n"
548		:   [ret]    "=&r"  (ret),
549		    [exf]    "=&r"  (exflag),
550		    [tmp]    "=&r"  (tmp)
551		:   [ptr]    "r"    (p),
552		    [val]    "r"    (val)
553		:   "cc", "memory");
554	return (ret);
555}
556
557static __inline uint64_t
558atomic_readandclear_64(volatile uint64_t *p)
559{
560	uint64_t ret, tmp;
561	uint32_t exflag;
562
563	__asm __volatile(
564		"1:          \n"
565		"   ldrexd   %[ret], [%[ptr]]\n"
566		"   mov      %Q[tmp], #0\n"
567		"   mov      %R[tmp], #0\n"
568		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
569		"   teq      %[exf], #0\n"
570		"   it ne    \n"
571		"   bne      1b\n"
572		:   [ret]    "=&r"  (ret),
573		    [exf]    "=&r"  (exflag),
574		    [tmp]    "=&r"  (tmp)
575		:   [ptr]    "r"    (p)
576		:   "cc", "memory");
577	return (ret);
578}
579
580static __inline uint64_t
581atomic_load_64(volatile uint64_t *p)
582{
583	uint64_t ret;
584
585	/*
586	 * The only way to atomically load 64 bits is with LDREXD which puts the
587	 * exclusive monitor into the open state, so reset it with CLREX because
588	 * we don't actually need to store anything.
589	 */
590	__asm __volatile(
591		"1:          \n"
592		"   ldrexd   %[ret], [%[ptr]]\n"
593		"   clrex    \n"
594		:   [ret]    "=&r"  (ret)
595		:   [ptr]    "r"    (p)
596		:   "cc", "memory");
597	return (ret);
598}
599
600static __inline uint64_t
601atomic_load_acq_64(volatile uint64_t *p)
602{
603	uint64_t ret;
604
605	ret = atomic_load_64(p);
606	__do_dmb();
607	return (ret);
608}
609
610static __inline void
611atomic_store_64(volatile uint64_t *p, uint64_t val)
612{
613	uint64_t tmp;
614	uint32_t exflag;
615
616	/*
617	 * The only way to atomically store 64 bits is with STREXD, which will
618	 * succeed only if paired up with a preceeding LDREXD using the same
619	 * address, so we read and discard the existing value before storing.
620	 */
621	__asm __volatile(
622		"1:          \n"
623		"   ldrexd   %[tmp], [%[ptr]]\n"
624		"   strexd   %[exf], %[val], [%[ptr]]\n"
625		"   teq      %[exf], #0\n"
626		"   it ne    \n"
627		"   bne      1b\n"
628		:   [tmp]    "=&r"  (tmp),
629		    [exf]    "=&r"  (exflag)
630		:   [ptr]    "r"    (p),
631		    [val]    "r"    (val)
632		:   "cc", "memory");
633}
634
635static __inline void
636atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
637{
638
639	__do_dmb();
640	atomic_store_64(p, val);
641}
642
643static __inline u_long
644atomic_fetchadd_long(volatile u_long *p, u_long val)
645{
646	u_long tmp = 0, tmp2 = 0, ret = 0;
647
648	__asm __volatile("1: ldrex %0, [%3]\n"
649	    		    "add %1, %0, %4\n"
650			    "strex %2, %1, [%3]\n"
651			    "cmp %2, #0\n"
652	                    "it ne\n"
653			    "bne	1b\n"
654			   : "+r" (ret), "=&r" (tmp), "+r" (tmp2)
655			   ,"+r" (p), "+r" (val) : : "cc", "memory");
656	return (ret);
657}
658
659static __inline u_long
660atomic_readandclear_long(volatile u_long *p)
661{
662	u_long ret, tmp = 0, tmp2 = 0;
663
664	__asm __volatile("1: ldrex %0, [%3]\n"
665	    		 "mov %1, #0\n"
666			 "strex %2, %1, [%3]\n"
667			 "cmp %2, #0\n"
668	                 "it ne\n"
669			 "bne 1b\n"
670			 : "=r" (ret), "=&r" (tmp), "+r" (tmp2)
671			 ,"+r" (p) : : "cc", "memory");
672	return (ret);
673}
674
675static __inline u_long
676atomic_load_acq_long(volatile u_long *p)
677{
678	u_long v;
679
680	v = *p;
681	__do_dmb();
682	return (v);
683}
684
685static __inline void
686atomic_store_rel_long(volatile u_long *p, u_long v)
687{
688
689	__do_dmb();
690	*p = v;
691}
692#else /* < armv6 */
693
694#define __with_interrupts_disabled(expr) \
695	do {						\
696		u_int cpsr_save, tmp;			\
697							\
698		__asm __volatile(			\
699			"mrs  %0, cpsr;"		\
700			"orr  %1, %0, %2;"		\
701			"msr  cpsr_fsxc, %1;"		\
702			: "=r" (cpsr_save), "=r" (tmp)	\
703			: "I" (I32_bit | F32_bit)		\
704		        : "cc" );		\
705		(expr);				\
706		 __asm __volatile(		\
707			"msr  cpsr_fsxc, %0"	\
708			: /* no output */	\
709			: "r" (cpsr_save)	\
710			: "cc" );		\
711	} while(0)
712
713static __inline uint32_t
714__swp(uint32_t val, volatile uint32_t *ptr)
715{
716	__asm __volatile("swp	%0, %2, [%3]"
717	    : "=&r" (val), "=m" (*ptr)
718	    : "r" (val), "r" (ptr), "m" (*ptr)
719	    : "memory");
720	return (val);
721}
722
723
724#ifdef _KERNEL
725static __inline void
726atomic_set_32(volatile uint32_t *address, uint32_t setmask)
727{
728	__with_interrupts_disabled(*address |= setmask);
729}
730
731static __inline void
732atomic_clear_32(volatile uint32_t *address, uint32_t clearmask)
733{
734	__with_interrupts_disabled(*address &= ~clearmask);
735}
736
737static __inline u_int32_t
738atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
739{
740	int ret;
741
742	__with_interrupts_disabled(
743	 {
744	    	if (*p == cmpval) {
745			*p = newval;
746			ret = 1;
747		} else {
748			ret = 0;
749		}
750	});
751	return (ret);
752}
753
754static __inline void
755atomic_add_32(volatile u_int32_t *p, u_int32_t val)
756{
757	__with_interrupts_disabled(*p += val);
758}
759
760static __inline void
761atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
762{
763	__with_interrupts_disabled(*p -= val);
764}
765
766static __inline uint32_t
767atomic_fetchadd_32(volatile uint32_t *p, uint32_t v)
768{
769	uint32_t value;
770
771	__with_interrupts_disabled(
772	{
773	    	value = *p;
774		*p += v;
775	});
776	return (value);
777}
778
779#else /* !_KERNEL */
780
781static __inline u_int32_t
782atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
783{
784	register int done, ras_start = ARM_RAS_START;
785
786	__asm __volatile("1:\n"
787	    "adr	%1, 1b\n"
788	    "str	%1, [%0]\n"
789	    "adr	%1, 2f\n"
790	    "str	%1, [%0, #4]\n"
791	    "ldr	%1, [%2]\n"
792	    "cmp	%1, %3\n"
793	    "streq	%4, [%2]\n"
794	    "2:\n"
795	    "mov	%1, #0\n"
796	    "str	%1, [%0]\n"
797	    "mov	%1, #0xffffffff\n"
798	    "str	%1, [%0, #4]\n"
799	    "moveq	%1, #1\n"
800	    "movne	%1, #0\n"
801	    : "+r" (ras_start), "=r" (done)
802	    ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc", "memory");
803	return (done);
804}
805
806static __inline void
807atomic_add_32(volatile u_int32_t *p, u_int32_t val)
808{
809	int start, ras_start = ARM_RAS_START;
810
811	__asm __volatile("1:\n"
812	    "adr	%1, 1b\n"
813	    "str	%1, [%0]\n"
814	    "adr	%1, 2f\n"
815	    "str	%1, [%0, #4]\n"
816	    "ldr	%1, [%2]\n"
817	    "add	%1, %1, %3\n"
818	    "str	%1, [%2]\n"
819	    "2:\n"
820	    "mov	%1, #0\n"
821	    "str	%1, [%0]\n"
822	    "mov	%1, #0xffffffff\n"
823	    "str	%1, [%0, #4]\n"
824	    : "+r" (ras_start), "=r" (start), "+r" (p), "+r" (val)
825	    : : "memory");
826}
827
828static __inline void
829atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
830{
831	int start, ras_start = ARM_RAS_START;
832
833	__asm __volatile("1:\n"
834	    "adr	%1, 1b\n"
835	    "str	%1, [%0]\n"
836	    "adr	%1, 2f\n"
837	    "str	%1, [%0, #4]\n"
838	    "ldr	%1, [%2]\n"
839	    "sub	%1, %1, %3\n"
840	    "str	%1, [%2]\n"
841	    "2:\n"
842	    "mov	%1, #0\n"
843	    "str	%1, [%0]\n"
844	    "mov	%1, #0xffffffff\n"
845	    "str	%1, [%0, #4]\n"
846
847	    : "+r" (ras_start), "=r" (start), "+r" (p), "+r" (val)
848	    : : "memory");
849}
850
851static __inline void
852atomic_set_32(volatile uint32_t *address, uint32_t setmask)
853{
854	int start, ras_start = ARM_RAS_START;
855
856	__asm __volatile("1:\n"
857	    "adr	%1, 1b\n"
858	    "str	%1, [%0]\n"
859	    "adr	%1, 2f\n"
860	    "str	%1, [%0, #4]\n"
861	    "ldr	%1, [%2]\n"
862	    "orr	%1, %1, %3\n"
863	    "str	%1, [%2]\n"
864	    "2:\n"
865	    "mov	%1, #0\n"
866	    "str	%1, [%0]\n"
867	    "mov	%1, #0xffffffff\n"
868	    "str	%1, [%0, #4]\n"
869
870	    : "+r" (ras_start), "=r" (start), "+r" (address), "+r" (setmask)
871	    : : "memory");
872}
873
874static __inline void
875atomic_clear_32(volatile uint32_t *address, uint32_t clearmask)
876{
877	int start, ras_start = ARM_RAS_START;
878
879	__asm __volatile("1:\n"
880	    "adr	%1, 1b\n"
881	    "str	%1, [%0]\n"
882	    "adr	%1, 2f\n"
883	    "str	%1, [%0, #4]\n"
884	    "ldr	%1, [%2]\n"
885	    "bic	%1, %1, %3\n"
886	    "str	%1, [%2]\n"
887	    "2:\n"
888	    "mov	%1, #0\n"
889	    "str	%1, [%0]\n"
890	    "mov	%1, #0xffffffff\n"
891	    "str	%1, [%0, #4]\n"
892	    : "+r" (ras_start), "=r" (start), "+r" (address), "+r" (clearmask)
893	    : : "memory");
894
895}
896
897static __inline uint32_t
898atomic_fetchadd_32(volatile uint32_t *p, uint32_t v)
899{
900	uint32_t start, tmp, ras_start = ARM_RAS_START;
901
902	__asm __volatile("1:\n"
903	    "adr	%1, 1b\n"
904	    "str	%1, [%0]\n"
905	    "adr	%1, 2f\n"
906	    "str	%1, [%0, #4]\n"
907	    "ldr	%1, [%3]\n"
908	    "mov	%2, %1\n"
909	    "add	%2, %2, %4\n"
910	    "str	%2, [%3]\n"
911	    "2:\n"
912	    "mov	%2, #0\n"
913	    "str	%2, [%0]\n"
914	    "mov	%2, #0xffffffff\n"
915	    "str	%2, [%0, #4]\n"
916	    : "+r" (ras_start), "=r" (start), "=r" (tmp), "+r" (p), "+r" (v)
917	    : : "memory");
918	return (start);
919}
920
921#endif /* _KERNEL */
922
923
924static __inline uint32_t
925atomic_readandclear_32(volatile u_int32_t *p)
926{
927
928	return (__swp(0, p));
929}
930
931#define atomic_cmpset_rel_32	atomic_cmpset_32
932#define atomic_cmpset_acq_32	atomic_cmpset_32
933#define atomic_set_rel_32	atomic_set_32
934#define atomic_set_acq_32	atomic_set_32
935#define atomic_clear_rel_32	atomic_clear_32
936#define atomic_clear_acq_32	atomic_clear_32
937#define atomic_add_rel_32	atomic_add_32
938#define atomic_add_acq_32	atomic_add_32
939#define atomic_subtract_rel_32	atomic_subtract_32
940#define atomic_subtract_acq_32	atomic_subtract_32
941#define atomic_store_rel_32	atomic_store_32
942#define atomic_store_rel_long	atomic_store_long
943#define atomic_load_acq_32	atomic_load_32
944#define atomic_load_acq_long	atomic_load_long
945#define atomic_add_acq_long		atomic_add_long
946#define atomic_add_rel_long		atomic_add_long
947#define atomic_subtract_acq_long	atomic_subtract_long
948#define atomic_subtract_rel_long	atomic_subtract_long
949#define atomic_clear_acq_long		atomic_clear_long
950#define atomic_clear_rel_long		atomic_clear_long
951#define atomic_set_acq_long		atomic_set_long
952#define atomic_set_rel_long		atomic_set_long
953#define atomic_cmpset_acq_long		atomic_cmpset_long
954#define atomic_cmpset_rel_long		atomic_cmpset_long
955#define atomic_load_acq_long		atomic_load_long
956#undef __with_interrupts_disabled
957
958static __inline void
959atomic_add_long(volatile u_long *p, u_long v)
960{
961
962	atomic_add_32((volatile uint32_t *)p, v);
963}
964
965static __inline void
966atomic_clear_long(volatile u_long *p, u_long v)
967{
968
969	atomic_clear_32((volatile uint32_t *)p, v);
970}
971
972static __inline int
973atomic_cmpset_long(volatile u_long *dst, u_long old, u_long newe)
974{
975
976	return (atomic_cmpset_32((volatile uint32_t *)dst, old, newe));
977}
978
979static __inline u_long
980atomic_fetchadd_long(volatile u_long *p, u_long v)
981{
982
983	return (atomic_fetchadd_32((volatile uint32_t *)p, v));
984}
985
986static __inline void
987atomic_readandclear_long(volatile u_long *p)
988{
989
990	atomic_readandclear_32((volatile uint32_t *)p);
991}
992
993static __inline void
994atomic_set_long(volatile u_long *p, u_long v)
995{
996
997	atomic_set_32((volatile uint32_t *)p, v);
998}
999
1000static __inline void
1001atomic_subtract_long(volatile u_long *p, u_long v)
1002{
1003
1004	atomic_subtract_32((volatile uint32_t *)p, v);
1005}
1006
1007
1008
1009#endif /* Arch >= v6 */
1010
1011static __inline int
1012atomic_load_32(volatile uint32_t *v)
1013{
1014
1015	return (*v);
1016}
1017
1018static __inline void
1019atomic_store_32(volatile uint32_t *dst, uint32_t src)
1020{
1021	*dst = src;
1022}
1023
1024static __inline int
1025atomic_load_long(volatile u_long *v)
1026{
1027
1028	return (*v);
1029}
1030
1031static __inline void
1032atomic_store_long(volatile u_long *dst, u_long src)
1033{
1034	*dst = src;
1035}
1036
1037#define atomic_clear_ptr		atomic_clear_32
1038#define atomic_set_ptr			atomic_set_32
1039#define atomic_cmpset_ptr		atomic_cmpset_32
1040#define atomic_cmpset_rel_ptr		atomic_cmpset_rel_32
1041#define atomic_cmpset_acq_ptr		atomic_cmpset_acq_32
1042#define atomic_store_ptr		atomic_store_32
1043#define atomic_store_rel_ptr		atomic_store_rel_32
1044
1045#define atomic_add_int			atomic_add_32
1046#define atomic_add_acq_int		atomic_add_acq_32
1047#define atomic_add_rel_int		atomic_add_rel_32
1048#define atomic_subtract_int		atomic_subtract_32
1049#define atomic_subtract_acq_int		atomic_subtract_acq_32
1050#define atomic_subtract_rel_int		atomic_subtract_rel_32
1051#define atomic_clear_int		atomic_clear_32
1052#define atomic_clear_acq_int		atomic_clear_acq_32
1053#define atomic_clear_rel_int		atomic_clear_rel_32
1054#define atomic_set_int			atomic_set_32
1055#define atomic_set_acq_int		atomic_set_acq_32
1056#define atomic_set_rel_int		atomic_set_rel_32
1057#define atomic_cmpset_int		atomic_cmpset_32
1058#define atomic_cmpset_acq_int		atomic_cmpset_acq_32
1059#define atomic_cmpset_rel_int		atomic_cmpset_rel_32
1060#define atomic_fetchadd_int		atomic_fetchadd_32
1061#define atomic_readandclear_int		atomic_readandclear_32
1062#define atomic_load_acq_int		atomic_load_acq_32
1063#define atomic_store_rel_int		atomic_store_rel_32
1064
1065#endif /* _MACHINE_ATOMIC_H_ */
1066