atomic.h revision 269405
1/* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
2
3/*-
4 * Copyright (C) 2003-2004 Olivier Houchard
5 * Copyright (C) 1994-1997 Mark Brinicombe
6 * Copyright (C) 1994 Brini
7 * All rights reserved.
8 *
9 * This code is derived from software written for Brini by Mark Brinicombe
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by Brini.
22 * 4. The name of Brini may not be used to endorse or promote products
23 *    derived from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
26 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
28 * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
31 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
32 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
33 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
34 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 *
36 * $FreeBSD: head/sys/arm/include/atomic.h 269405 2014-08-01 22:56:41Z ian $
37 */
38
39#ifndef	_MACHINE_ATOMIC_H_
40#define	_MACHINE_ATOMIC_H_
41
42#include <sys/types.h>
43
44#ifndef _KERNEL
45#include <machine/sysarch.h>
46#else
47#include <machine/cpuconf.h>
48#endif
49
50#if defined (__ARM_ARCH_7__) || defined (__ARM_ARCH_7A__)
51#define isb()  __asm __volatile("isb" : : : "memory")
52#define dsb()  __asm __volatile("dsb" : : : "memory")
53#define dmb()  __asm __volatile("dmb" : : : "memory")
54#elif defined (__ARM_ARCH_6__) || defined (__ARM_ARCH_6J__) || \
55  defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6T2__) || \
56  defined (__ARM_ARCH_6Z__) || defined (__ARM_ARCH_6ZK__)
57#define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
58#define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
59#define dmb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
60#else
61#define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
62#define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
63#define dmb()  dsb()
64#endif
65
66#define mb()   dmb()
67#define wmb()  dmb()
68#define rmb()  dmb()
69
70#ifndef I32_bit
71#define I32_bit (1 << 7)        /* IRQ disable */
72#endif
73#ifndef F32_bit
74#define F32_bit (1 << 6)        /* FIQ disable */
75#endif
76
77/*
78 * It would be nice to use _HAVE_ARMv6_INSTRUCTIONS from machine/asm.h
79 * here, but that header can't be included here because this is C
80 * code.  I would like to move the _HAVE_ARMv6_INSTRUCTIONS definition
81 * out of asm.h so it can be used in both asm and C code. - kientzle@
82 */
83#if defined (__ARM_ARCH_7__) || \
84	defined (__ARM_ARCH_7A__)  || \
85	defined (__ARM_ARCH_6__)   || \
86	defined (__ARM_ARCH_6J__)  || \
87	defined (__ARM_ARCH_6K__)  || \
88	defined (__ARM_ARCH_6T2__) || \
89	defined (__ARM_ARCH_6Z__)  || \
90	defined (__ARM_ARCH_6ZK__)
91static __inline void
92__do_dmb(void)
93{
94
95#if defined (__ARM_ARCH_7__) || defined (__ARM_ARCH_7A__)
96	__asm __volatile("dmb" : : : "memory");
97#else
98	__asm __volatile("mcr p15, 0, r0, c7, c10, 5" : : : "memory");
99#endif
100}
101
102#define ATOMIC_ACQ_REL_LONG(NAME)					\
103static __inline void							\
104atomic_##NAME##_acq_long(__volatile u_long *p, u_long v)		\
105{									\
106	atomic_##NAME##_long(p, v);					\
107	__do_dmb();							\
108}									\
109									\
110static __inline  void							\
111atomic_##NAME##_rel_long(__volatile u_long *p, u_long v)		\
112{									\
113	__do_dmb();							\
114	atomic_##NAME##_long(p, v);					\
115}
116
117#define	ATOMIC_ACQ_REL(NAME, WIDTH)					\
118static __inline  void							\
119atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
120{									\
121	atomic_##NAME##_##WIDTH(p, v);					\
122	__do_dmb();							\
123}									\
124									\
125static __inline  void							\
126atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
127{									\
128	__do_dmb();							\
129	atomic_##NAME##_##WIDTH(p, v);					\
130}
131
132static __inline void
133atomic_set_32(volatile uint32_t *address, uint32_t setmask)
134{
135	uint32_t tmp = 0, tmp2 = 0;
136
137	__asm __volatile("1: ldrex %0, [%2]\n"
138	    		    "orr %0, %0, %3\n"
139			    "strex %1, %0, [%2]\n"
140			    "cmp %1, #0\n"
141	                    "it ne\n"
142			    "bne	1b\n"
143			   : "=&r" (tmp), "+r" (tmp2)
144			   , "+r" (address), "+r" (setmask) : : "cc", "memory");
145
146}
147
148static __inline void
149atomic_set_64(volatile uint64_t *p, uint64_t val)
150{
151	uint64_t tmp;
152	uint32_t exflag;
153
154	__asm __volatile(
155		"1:          \n"
156		"   ldrexd   %[tmp], [%[ptr]]\n"
157		"   orr      %Q[tmp], %Q[val]\n"
158		"   orr      %R[tmp], %R[val]\n"
159		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
160		"   teq      %[exf], #0\n"
161		"   it ne    \n"
162		"   bne      1b\n"
163		:   [exf]    "=&r"  (exflag),
164		    [tmp]    "=&r"  (tmp)
165		:   [ptr]    "r"    (p),
166		    [val]    "r"    (val)
167		:   "cc", "memory");
168}
169
170static __inline void
171atomic_set_long(volatile u_long *address, u_long setmask)
172{
173	u_long tmp = 0, tmp2 = 0;
174
175	__asm __volatile("1: ldrex %0, [%2]\n"
176	    		    "orr %0, %0, %3\n"
177			    "strex %1, %0, [%2]\n"
178			    "cmp %1, #0\n"
179	                    "it ne\n"
180			    "bne	1b\n"
181			   : "=&r" (tmp), "+r" (tmp2)
182			   , "+r" (address), "+r" (setmask) : : "cc", "memory");
183
184}
185
186static __inline void
187atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
188{
189	uint32_t tmp = 0, tmp2 = 0;
190
191	__asm __volatile("1: ldrex %0, [%2]\n"
192	    		    "bic %0, %0, %3\n"
193			    "strex %1, %0, [%2]\n"
194			    "cmp %1, #0\n"
195	                    "it ne\n"
196			    "bne	1b\n"
197			   : "=&r" (tmp), "+r" (tmp2)
198			   ,"+r" (address), "+r" (setmask) : : "cc", "memory");
199}
200
201static __inline void
202atomic_clear_64(volatile uint64_t *p, uint64_t val)
203{
204	uint64_t tmp;
205	uint32_t exflag;
206
207	__asm __volatile(
208		"1:          \n"
209		"   ldrexd   %[tmp], [%[ptr]]\n"
210		"   bic      %Q[tmp], %Q[val]\n"
211		"   bic      %R[tmp], %R[val]\n"
212		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
213		"   teq      %[exf], #0\n"
214		"   it ne    \n"
215		"   bne      1b\n"
216		:   [exf]    "=&r"  (exflag),
217		    [tmp]    "=&r"  (tmp)
218		:   [ptr]    "r"    (p),
219		    [val]    "r"    (val)
220		:   "cc", "memory");
221}
222
223static __inline void
224atomic_clear_long(volatile u_long *address, u_long setmask)
225{
226	u_long tmp = 0, tmp2 = 0;
227
228	__asm __volatile("1: ldrex %0, [%2]\n"
229	    		    "bic %0, %0, %3\n"
230			    "strex %1, %0, [%2]\n"
231			    "cmp %1, #0\n"
232	                    "it ne\n"
233			    "bne	1b\n"
234			   : "=&r" (tmp), "+r" (tmp2)
235			   ,"+r" (address), "+r" (setmask) : : "cc", "memory");
236}
237
238static __inline u_int32_t
239atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
240{
241	uint32_t ret;
242
243	__asm __volatile("1: ldrex %0, [%1]\n"
244	                 "cmp %0, %2\n"
245	                 "itt ne\n"
246			 "movne %0, #0\n"
247			 "bne 2f\n"
248			 "strex %0, %3, [%1]\n"
249			 "cmp %0, #0\n"
250	                 "ite eq\n"
251			 "moveq %0, #1\n"
252			 "bne	1b\n"
253			 "2:"
254			 : "=&r" (ret)
255			 ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc",
256			 "memory");
257	return (ret);
258}
259
260static __inline int
261atomic_cmpset_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
262{
263	uint64_t tmp;
264	uint32_t ret;
265
266	__asm __volatile(
267		"1:          \n"
268		"   ldrexd   %[tmp], [%[ptr]]\n"
269		"   teq      %Q[tmp], %Q[cmp]\n"
270		"   itee eq  \n"
271		"   teqeq    %R[tmp], %R[cmp]\n"
272		"   movne    %[ret], #0\n"
273		"   bne      2f\n"
274		"   strexd   %[ret], %[new], [%[ptr]]\n"
275		"   teq      %[ret], #0\n"
276		"   it ne    \n"
277		"   bne      1b\n"
278		"   mov      %[ret], #1\n"
279		"2:          \n"
280		:   [ret]    "=&r"  (ret),
281		    [tmp]    "=&r"  (tmp)
282		:   [ptr]    "r"    (p),
283		    [cmp]    "r"    (cmpval),
284		    [new]    "r"    (newval)
285		:   "cc", "memory");
286	return (ret);
287}
288
289static __inline u_long
290atomic_cmpset_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
291{
292	u_long ret;
293
294	__asm __volatile("1: ldrex %0, [%1]\n"
295	                 "cmp %0, %2\n"
296	                 "itt ne\n"
297			 "movne %0, #0\n"
298			 "bne 2f\n"
299			 "strex %0, %3, [%1]\n"
300			 "cmp %0, #0\n"
301	                 "ite eq\n"
302			 "moveq %0, #1\n"
303			 "bne	1b\n"
304			 "2:"
305			 : "=&r" (ret)
306			 ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc",
307			 "memory");
308	return (ret);
309}
310
311static __inline u_int32_t
312atomic_cmpset_acq_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
313{
314	u_int32_t ret = atomic_cmpset_32(p, cmpval, newval);
315
316	__do_dmb();
317	return (ret);
318}
319
320static __inline uint64_t
321atomic_cmpset_acq_64(volatile uint64_t *p, volatile uint64_t cmpval, volatile uint64_t newval)
322{
323	uint64_t ret = atomic_cmpset_64(p, cmpval, newval);
324
325	__do_dmb();
326	return (ret);
327}
328
329static __inline u_long
330atomic_cmpset_acq_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
331{
332	u_long ret = atomic_cmpset_long(p, cmpval, newval);
333
334	__do_dmb();
335	return (ret);
336}
337
338static __inline u_int32_t
339atomic_cmpset_rel_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
340{
341
342	__do_dmb();
343	return (atomic_cmpset_32(p, cmpval, newval));
344}
345
346static __inline uint64_t
347atomic_cmpset_rel_64(volatile uint64_t *p, volatile uint64_t cmpval, volatile uint64_t newval)
348{
349
350	__do_dmb();
351	return (atomic_cmpset_64(p, cmpval, newval));
352}
353
354static __inline u_long
355atomic_cmpset_rel_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
356{
357
358	__do_dmb();
359	return (atomic_cmpset_long(p, cmpval, newval));
360}
361
362
363static __inline void
364atomic_add_32(volatile u_int32_t *p, u_int32_t val)
365{
366	uint32_t tmp = 0, tmp2 = 0;
367
368	__asm __volatile("1: ldrex %0, [%2]\n"
369	    		    "add %0, %0, %3\n"
370			    "strex %1, %0, [%2]\n"
371			    "cmp %1, #0\n"
372	                    "it ne\n"
373			    "bne	1b\n"
374			    : "=&r" (tmp), "+r" (tmp2)
375			    ,"+r" (p), "+r" (val) : : "cc", "memory");
376}
377
378static __inline void
379atomic_add_64(volatile uint64_t *p, uint64_t val)
380{
381	uint64_t tmp;
382	uint32_t exflag;
383
384	__asm __volatile(
385		"1:          \n"
386		"   ldrexd   %[tmp], [%[ptr]]\n"
387		"   adds     %Q[tmp], %Q[val]\n"
388		"   adc      %R[tmp], %R[val]\n"
389		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
390		"   teq      %[exf], #0\n"
391		"   it ne    \n"
392		"   bne      1b\n"
393		:   [exf]    "=&r"  (exflag),
394		    [tmp]    "=&r"  (tmp)
395		:   [ptr]    "r"    (p),
396		    [val]    "r"    (val)
397		:   "cc", "memory");
398}
399
400static __inline void
401atomic_add_long(volatile u_long *p, u_long val)
402{
403	u_long tmp = 0, tmp2 = 0;
404
405	__asm __volatile("1: ldrex %0, [%2]\n"
406	    		    "add %0, %0, %3\n"
407			    "strex %1, %0, [%2]\n"
408			    "cmp %1, #0\n"
409	                    "it ne\n"
410			    "bne	1b\n"
411			    : "=&r" (tmp), "+r" (tmp2)
412			    ,"+r" (p), "+r" (val) : : "cc", "memory");
413}
414
415static __inline void
416atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
417{
418	uint32_t tmp = 0, tmp2 = 0;
419
420	__asm __volatile("1: ldrex %0, [%2]\n"
421	    		    "sub %0, %0, %3\n"
422			    "strex %1, %0, [%2]\n"
423			    "cmp %1, #0\n"
424	                    "it ne\n"
425			    "bne	1b\n"
426			    : "=&r" (tmp), "+r" (tmp2)
427			    ,"+r" (p), "+r" (val) : : "cc", "memory");
428}
429
430static __inline void
431atomic_subtract_64(volatile uint64_t *p, uint64_t val)
432{
433	uint64_t tmp;
434	uint32_t exflag;
435
436	__asm __volatile(
437		"1:          \n"
438		"   ldrexd   %[tmp], [%[ptr]]\n"
439		"   subs     %Q[tmp], %Q[val]\n"
440		"   sbc      %R[tmp], %R[val]\n"
441		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
442		"   teq      %[exf], #0\n"
443		"   it ne    \n"
444		"   bne      1b\n"
445		:   [exf]    "=&r"  (exflag),
446		    [tmp]    "=&r"  (tmp)
447		:   [ptr]    "r"    (p),
448		    [val]    "r"    (val)
449		:   "cc", "memory");
450}
451
452static __inline void
453atomic_subtract_long(volatile u_long *p, u_long val)
454{
455	u_long tmp = 0, tmp2 = 0;
456
457	__asm __volatile("1: ldrex %0, [%2]\n"
458	    		    "sub %0, %0, %3\n"
459			    "strex %1, %0, [%2]\n"
460			    "cmp %1, #0\n"
461	                    "it ne\n"
462			    "bne	1b\n"
463			    : "=&r" (tmp), "+r" (tmp2)
464			    ,"+r" (p), "+r" (val) : : "cc", "memory");
465}
466
467ATOMIC_ACQ_REL(clear, 32)
468ATOMIC_ACQ_REL(add, 32)
469ATOMIC_ACQ_REL(subtract, 32)
470ATOMIC_ACQ_REL(set, 32)
471ATOMIC_ACQ_REL(clear, 64)
472ATOMIC_ACQ_REL(add, 64)
473ATOMIC_ACQ_REL(subtract, 64)
474ATOMIC_ACQ_REL(set, 64)
475ATOMIC_ACQ_REL_LONG(clear)
476ATOMIC_ACQ_REL_LONG(add)
477ATOMIC_ACQ_REL_LONG(subtract)
478ATOMIC_ACQ_REL_LONG(set)
479
480#undef ATOMIC_ACQ_REL
481#undef ATOMIC_ACQ_REL_LONG
482
483static __inline uint32_t
484atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
485{
486	uint32_t tmp = 0, tmp2 = 0, ret = 0;
487
488	__asm __volatile("1: ldrex %0, [%3]\n"
489	    		    "add %1, %0, %4\n"
490			    "strex %2, %1, [%3]\n"
491			    "cmp %2, #0\n"
492	                    "it ne\n"
493			    "bne	1b\n"
494			   : "+r" (ret), "=&r" (tmp), "+r" (tmp2)
495			   ,"+r" (p), "+r" (val) : : "cc", "memory");
496	return (ret);
497}
498
499static __inline uint32_t
500atomic_readandclear_32(volatile u_int32_t *p)
501{
502	uint32_t ret, tmp = 0, tmp2 = 0;
503
504	__asm __volatile("1: ldrex %0, [%3]\n"
505	    		 "mov %1, #0\n"
506			 "strex %2, %1, [%3]\n"
507			 "cmp %2, #0\n"
508	                 "it ne\n"
509			 "bne 1b\n"
510			 : "=r" (ret), "=&r" (tmp), "+r" (tmp2)
511			 ,"+r" (p) : : "cc", "memory");
512	return (ret);
513}
514
515static __inline uint32_t
516atomic_load_acq_32(volatile uint32_t *p)
517{
518	uint32_t v;
519
520	v = *p;
521	__do_dmb();
522	return (v);
523}
524
525static __inline void
526atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
527{
528
529	__do_dmb();
530	*p = v;
531}
532
533static __inline uint64_t
534atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
535{
536	uint64_t ret, tmp;
537	uint32_t exflag;
538
539	__asm __volatile(
540		"1:          \n"
541		"   ldrexd   %[ret], [%[ptr]]\n"
542		"   adds     %Q[tmp], %Q[ret], %Q[val]\n"
543		"   adc      %R[tmp], %R[ret], %R[val]\n"
544		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
545		"   teq      %[exf], #0\n"
546		"   it ne    \n"
547		"   bne      1b\n"
548		:   [ret]    "=&r"  (ret),
549		    [exf]    "=&r"  (exflag),
550		    [tmp]    "=&r"  (tmp)
551		:   [ptr]    "r"    (p),
552		    [val]    "r"    (val)
553		:   "cc", "memory");
554	return (ret);
555}
556
557static __inline uint64_t
558atomic_readandclear_64(volatile uint64_t *p)
559{
560	uint64_t ret, tmp;
561	uint32_t exflag;
562
563	__asm __volatile(
564		"1:          \n"
565		"   ldrexd   %[ret], [%[ptr]]\n"
566		"   mov      %Q[tmp], #0\n"
567		"   mov      %R[tmp], #0\n"
568		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
569		"   teq      %[exf], #0\n"
570		"   it ne    \n"
571		"   bne      1b\n"
572		:   [ret]    "=&r"  (ret),
573		    [exf]    "=&r"  (exflag),
574		    [tmp]    "=&r"  (tmp)
575		:   [ptr]    "r"    (p)
576		:   "cc", "memory");
577	return (ret);
578}
579
580static __inline uint64_t
581atomic_load_64(volatile uint64_t *p)
582{
583	uint64_t ret;
584
585	/*
586	 * The only way to atomically load 64 bits is with LDREXD which puts the
587	 * exclusive monitor into the open state, so reset it with CLREX because
588	 * we don't actually need to store anything.
589	 */
590	__asm __volatile(
591		"1:          \n"
592		"   ldrexd   %[ret], [%[ptr]]\n"
593		"   clrex    \n"
594		:   [ret]    "=&r"  (ret)
595		:   [ptr]    "r"    (p)
596		:   "cc", "memory");
597	return (ret);
598}
599
600static __inline uint64_t
601atomic_load_acq_64(volatile uint64_t *p)
602{
603	uint64_t ret;
604
605	ret = atomic_load_64(p);
606	__do_dmb();
607	return (ret);
608}
609
610static __inline void
611atomic_store_64(volatile uint64_t *p, uint64_t val)
612{
613	uint64_t tmp;
614	uint32_t exflag;
615
616	/*
617	 * The only way to atomically store 64 bits is with STREXD, which will
618	 * succeed only if paired up with a preceeding LDREXD using the same
619	 * address, so we read and discard the existing value before storing.
620	 */
621	__asm __volatile(
622		"1:          \n"
623		"   ldrexd   %[tmp], [%[ptr]]\n"
624		"   strexd   %[exf], %[val], [%[ptr]]\n"
625		"   teq      %[exf], #0\n"
626		"   it ne    \n"
627		"   bne      1b\n"
628		:   [tmp]    "=&r"  (tmp),
629		    [exf]    "=&r"  (exflag)
630		:   [ptr]    "r"    (p),
631		    [val]    "r"    (val)
632		:   "cc", "memory");
633}
634
635static __inline void
636atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
637{
638
639	__do_dmb();
640	atomic_store_64(p, val);
641}
642
643static __inline u_long
644atomic_fetchadd_long(volatile u_long *p, u_long val)
645{
646	u_long tmp = 0, tmp2 = 0, ret = 0;
647
648	__asm __volatile("1: ldrex %0, [%3]\n"
649	    		    "add %1, %0, %4\n"
650			    "strex %2, %1, [%3]\n"
651			    "cmp %2, #0\n"
652	                    "it ne\n"
653			    "bne	1b\n"
654			   : "+r" (ret), "=&r" (tmp), "+r" (tmp2)
655			   ,"+r" (p), "+r" (val) : : "cc", "memory");
656	return (ret);
657}
658
659static __inline u_long
660atomic_readandclear_long(volatile u_long *p)
661{
662	u_long ret, tmp = 0, tmp2 = 0;
663
664	__asm __volatile("1: ldrex %0, [%3]\n"
665	    		 "mov %1, #0\n"
666			 "strex %2, %1, [%3]\n"
667			 "cmp %2, #0\n"
668	                 "it ne\n"
669			 "bne 1b\n"
670			 : "=r" (ret), "=&r" (tmp), "+r" (tmp2)
671			 ,"+r" (p) : : "cc", "memory");
672	return (ret);
673}
674
675static __inline u_long
676atomic_load_acq_long(volatile u_long *p)
677{
678	u_long v;
679
680	v = *p;
681	__do_dmb();
682	return (v);
683}
684
685static __inline void
686atomic_store_rel_long(volatile u_long *p, u_long v)
687{
688
689	__do_dmb();
690	*p = v;
691}
692#else /* < armv6 */
693
694#define __with_interrupts_disabled(expr) \
695	do {						\
696		u_int cpsr_save, tmp;			\
697							\
698		__asm __volatile(			\
699			"mrs  %0, cpsr;"		\
700			"orr  %1, %0, %2;"		\
701			"msr  cpsr_fsxc, %1;"		\
702			: "=r" (cpsr_save), "=r" (tmp)	\
703			: "I" (I32_bit | F32_bit)		\
704		        : "cc" );		\
705		(expr);				\
706		 __asm __volatile(		\
707			"msr  cpsr_fsxc, %0"	\
708			: /* no output */	\
709			: "r" (cpsr_save)	\
710			: "cc" );		\
711	} while(0)
712
713static __inline uint32_t
714__swp(uint32_t val, volatile uint32_t *ptr)
715{
716	__asm __volatile("swp	%0, %2, [%3]"
717	    : "=&r" (val), "=m" (*ptr)
718	    : "r" (val), "r" (ptr), "m" (*ptr)
719	    : "memory");
720	return (val);
721}
722
723
724#ifdef _KERNEL
725static __inline void
726atomic_set_32(volatile uint32_t *address, uint32_t setmask)
727{
728	__with_interrupts_disabled(*address |= setmask);
729}
730
731static __inline void
732atomic_set_64(volatile uint64_t *address, uint64_t setmask)
733{
734	__with_interrupts_disabled(*address |= setmask);
735}
736
737static __inline void
738atomic_clear_32(volatile uint32_t *address, uint32_t clearmask)
739{
740	__with_interrupts_disabled(*address &= ~clearmask);
741}
742
743static __inline void
744atomic_clear_64(volatile uint64_t *address, uint64_t clearmask)
745{
746	__with_interrupts_disabled(*address &= ~clearmask);
747}
748
749static __inline u_int32_t
750atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
751{
752	int ret;
753
754	__with_interrupts_disabled(
755	 {
756	    	if (*p == cmpval) {
757			*p = newval;
758			ret = 1;
759		} else {
760			ret = 0;
761		}
762	});
763	return (ret);
764}
765
766static __inline u_int64_t
767atomic_cmpset_64(volatile u_int64_t *p, volatile u_int64_t cmpval, volatile u_int64_t newval)
768{
769	int ret;
770
771	__with_interrupts_disabled(
772	 {
773	    	if (*p == cmpval) {
774			*p = newval;
775			ret = 1;
776		} else {
777			ret = 0;
778		}
779	});
780	return (ret);
781}
782
783static __inline void
784atomic_add_32(volatile u_int32_t *p, u_int32_t val)
785{
786	__with_interrupts_disabled(*p += val);
787}
788
789static __inline void
790atomic_add_64(volatile u_int64_t *p, u_int64_t val)
791{
792	__with_interrupts_disabled(*p += val);
793}
794
795static __inline void
796atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
797{
798	__with_interrupts_disabled(*p -= val);
799}
800
801static __inline void
802atomic_subtract_64(volatile u_int64_t *p, u_int64_t val)
803{
804	__with_interrupts_disabled(*p -= val);
805}
806
807static __inline uint32_t
808atomic_fetchadd_32(volatile uint32_t *p, uint32_t v)
809{
810	uint32_t value;
811
812	__with_interrupts_disabled(
813	{
814	    	value = *p;
815		*p += v;
816	});
817	return (value);
818}
819
820static __inline uint64_t
821atomic_fetchadd_64(volatile uint64_t *p, uint64_t v)
822{
823	uint64_t value;
824
825	__with_interrupts_disabled(
826	{
827	    	value = *p;
828		*p += v;
829	});
830	return (value);
831}
832
833static __inline uint64_t
834atomic_load_64(volatile uint64_t *p)
835{
836	uint64_t value;
837
838	__with_interrupts_disabled(value = *p);
839	return (value);
840}
841
842static __inline void
843atomic_store_64(volatile uint64_t *p, uint64_t value)
844{
845	__with_interrupts_disabled(*p = value);
846}
847
848#else /* !_KERNEL */
849
850static __inline u_int32_t
851atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
852{
853	register int done, ras_start = ARM_RAS_START;
854
855	__asm __volatile("1:\n"
856	    "adr	%1, 1b\n"
857	    "str	%1, [%0]\n"
858	    "adr	%1, 2f\n"
859	    "str	%1, [%0, #4]\n"
860	    "ldr	%1, [%2]\n"
861	    "cmp	%1, %3\n"
862	    "streq	%4, [%2]\n"
863	    "2:\n"
864	    "mov	%1, #0\n"
865	    "str	%1, [%0]\n"
866	    "mov	%1, #0xffffffff\n"
867	    "str	%1, [%0, #4]\n"
868	    "moveq	%1, #1\n"
869	    "movne	%1, #0\n"
870	    : "+r" (ras_start), "=r" (done)
871	    ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc", "memory");
872	return (done);
873}
874
875static __inline void
876atomic_add_32(volatile u_int32_t *p, u_int32_t val)
877{
878	int start, ras_start = ARM_RAS_START;
879
880	__asm __volatile("1:\n"
881	    "adr	%1, 1b\n"
882	    "str	%1, [%0]\n"
883	    "adr	%1, 2f\n"
884	    "str	%1, [%0, #4]\n"
885	    "ldr	%1, [%2]\n"
886	    "add	%1, %1, %3\n"
887	    "str	%1, [%2]\n"
888	    "2:\n"
889	    "mov	%1, #0\n"
890	    "str	%1, [%0]\n"
891	    "mov	%1, #0xffffffff\n"
892	    "str	%1, [%0, #4]\n"
893	    : "+r" (ras_start), "=r" (start), "+r" (p), "+r" (val)
894	    : : "memory");
895}
896
897static __inline void
898atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
899{
900	int start, ras_start = ARM_RAS_START;
901
902	__asm __volatile("1:\n"
903	    "adr	%1, 1b\n"
904	    "str	%1, [%0]\n"
905	    "adr	%1, 2f\n"
906	    "str	%1, [%0, #4]\n"
907	    "ldr	%1, [%2]\n"
908	    "sub	%1, %1, %3\n"
909	    "str	%1, [%2]\n"
910	    "2:\n"
911	    "mov	%1, #0\n"
912	    "str	%1, [%0]\n"
913	    "mov	%1, #0xffffffff\n"
914	    "str	%1, [%0, #4]\n"
915
916	    : "+r" (ras_start), "=r" (start), "+r" (p), "+r" (val)
917	    : : "memory");
918}
919
920static __inline void
921atomic_set_32(volatile uint32_t *address, uint32_t setmask)
922{
923	int start, ras_start = ARM_RAS_START;
924
925	__asm __volatile("1:\n"
926	    "adr	%1, 1b\n"
927	    "str	%1, [%0]\n"
928	    "adr	%1, 2f\n"
929	    "str	%1, [%0, #4]\n"
930	    "ldr	%1, [%2]\n"
931	    "orr	%1, %1, %3\n"
932	    "str	%1, [%2]\n"
933	    "2:\n"
934	    "mov	%1, #0\n"
935	    "str	%1, [%0]\n"
936	    "mov	%1, #0xffffffff\n"
937	    "str	%1, [%0, #4]\n"
938
939	    : "+r" (ras_start), "=r" (start), "+r" (address), "+r" (setmask)
940	    : : "memory");
941}
942
943static __inline void
944atomic_clear_32(volatile uint32_t *address, uint32_t clearmask)
945{
946	int start, ras_start = ARM_RAS_START;
947
948	__asm __volatile("1:\n"
949	    "adr	%1, 1b\n"
950	    "str	%1, [%0]\n"
951	    "adr	%1, 2f\n"
952	    "str	%1, [%0, #4]\n"
953	    "ldr	%1, [%2]\n"
954	    "bic	%1, %1, %3\n"
955	    "str	%1, [%2]\n"
956	    "2:\n"
957	    "mov	%1, #0\n"
958	    "str	%1, [%0]\n"
959	    "mov	%1, #0xffffffff\n"
960	    "str	%1, [%0, #4]\n"
961	    : "+r" (ras_start), "=r" (start), "+r" (address), "+r" (clearmask)
962	    : : "memory");
963
964}
965
966static __inline uint32_t
967atomic_fetchadd_32(volatile uint32_t *p, uint32_t v)
968{
969	uint32_t start, tmp, ras_start = ARM_RAS_START;
970
971	__asm __volatile("1:\n"
972	    "adr	%1, 1b\n"
973	    "str	%1, [%0]\n"
974	    "adr	%1, 2f\n"
975	    "str	%1, [%0, #4]\n"
976	    "ldr	%1, [%3]\n"
977	    "mov	%2, %1\n"
978	    "add	%2, %2, %4\n"
979	    "str	%2, [%3]\n"
980	    "2:\n"
981	    "mov	%2, #0\n"
982	    "str	%2, [%0]\n"
983	    "mov	%2, #0xffffffff\n"
984	    "str	%2, [%0, #4]\n"
985	    : "+r" (ras_start), "=r" (start), "=r" (tmp), "+r" (p), "+r" (v)
986	    : : "memory");
987	return (start);
988}
989
990#endif /* _KERNEL */
991
992
993static __inline uint32_t
994atomic_readandclear_32(volatile u_int32_t *p)
995{
996
997	return (__swp(0, p));
998}
999
1000#define atomic_cmpset_rel_32	atomic_cmpset_32
1001#define atomic_cmpset_acq_32	atomic_cmpset_32
1002#define atomic_set_rel_32	atomic_set_32
1003#define atomic_set_acq_32	atomic_set_32
1004#define atomic_clear_rel_32	atomic_clear_32
1005#define atomic_clear_acq_32	atomic_clear_32
1006#define atomic_add_rel_32	atomic_add_32
1007#define atomic_add_acq_32	atomic_add_32
1008#define atomic_subtract_rel_32	atomic_subtract_32
1009#define atomic_subtract_acq_32	atomic_subtract_32
1010#define atomic_store_rel_32	atomic_store_32
1011#define atomic_store_rel_long	atomic_store_long
1012#define atomic_load_acq_32	atomic_load_32
1013#define atomic_load_acq_long	atomic_load_long
1014#define atomic_add_acq_long		atomic_add_long
1015#define atomic_add_rel_long		atomic_add_long
1016#define atomic_subtract_acq_long	atomic_subtract_long
1017#define atomic_subtract_rel_long	atomic_subtract_long
1018#define atomic_clear_acq_long		atomic_clear_long
1019#define atomic_clear_rel_long		atomic_clear_long
1020#define atomic_set_acq_long		atomic_set_long
1021#define atomic_set_rel_long		atomic_set_long
1022#define atomic_cmpset_acq_long		atomic_cmpset_long
1023#define atomic_cmpset_rel_long		atomic_cmpset_long
1024#define atomic_load_acq_long		atomic_load_long
1025#undef __with_interrupts_disabled
1026
1027static __inline void
1028atomic_add_long(volatile u_long *p, u_long v)
1029{
1030
1031	atomic_add_32((volatile uint32_t *)p, v);
1032}
1033
1034static __inline void
1035atomic_clear_long(volatile u_long *p, u_long v)
1036{
1037
1038	atomic_clear_32((volatile uint32_t *)p, v);
1039}
1040
1041static __inline int
1042atomic_cmpset_long(volatile u_long *dst, u_long old, u_long newe)
1043{
1044
1045	return (atomic_cmpset_32((volatile uint32_t *)dst, old, newe));
1046}
1047
1048static __inline u_long
1049atomic_fetchadd_long(volatile u_long *p, u_long v)
1050{
1051
1052	return (atomic_fetchadd_32((volatile uint32_t *)p, v));
1053}
1054
1055static __inline void
1056atomic_readandclear_long(volatile u_long *p)
1057{
1058
1059	atomic_readandclear_32((volatile uint32_t *)p);
1060}
1061
1062static __inline void
1063atomic_set_long(volatile u_long *p, u_long v)
1064{
1065
1066	atomic_set_32((volatile uint32_t *)p, v);
1067}
1068
1069static __inline void
1070atomic_subtract_long(volatile u_long *p, u_long v)
1071{
1072
1073	atomic_subtract_32((volatile uint32_t *)p, v);
1074}
1075
1076
1077
1078#endif /* Arch >= v6 */
1079
1080static __inline int
1081atomic_load_32(volatile uint32_t *v)
1082{
1083
1084	return (*v);
1085}
1086
1087static __inline void
1088atomic_store_32(volatile uint32_t *dst, uint32_t src)
1089{
1090	*dst = src;
1091}
1092
1093static __inline int
1094atomic_load_long(volatile u_long *v)
1095{
1096
1097	return (*v);
1098}
1099
1100static __inline void
1101atomic_store_long(volatile u_long *dst, u_long src)
1102{
1103	*dst = src;
1104}
1105
1106#define atomic_clear_ptr		atomic_clear_32
1107#define atomic_set_ptr			atomic_set_32
1108#define atomic_cmpset_ptr		atomic_cmpset_32
1109#define atomic_cmpset_rel_ptr		atomic_cmpset_rel_32
1110#define atomic_cmpset_acq_ptr		atomic_cmpset_acq_32
1111#define atomic_store_ptr		atomic_store_32
1112#define atomic_store_rel_ptr		atomic_store_rel_32
1113
1114#define atomic_add_int			atomic_add_32
1115#define atomic_add_acq_int		atomic_add_acq_32
1116#define atomic_add_rel_int		atomic_add_rel_32
1117#define atomic_subtract_int		atomic_subtract_32
1118#define atomic_subtract_acq_int		atomic_subtract_acq_32
1119#define atomic_subtract_rel_int		atomic_subtract_rel_32
1120#define atomic_clear_int		atomic_clear_32
1121#define atomic_clear_acq_int		atomic_clear_acq_32
1122#define atomic_clear_rel_int		atomic_clear_rel_32
1123#define atomic_set_int			atomic_set_32
1124#define atomic_set_acq_int		atomic_set_acq_32
1125#define atomic_set_rel_int		atomic_set_rel_32
1126#define atomic_cmpset_int		atomic_cmpset_32
1127#define atomic_cmpset_acq_int		atomic_cmpset_acq_32
1128#define atomic_cmpset_rel_int		atomic_cmpset_rel_32
1129#define atomic_fetchadd_int		atomic_fetchadd_32
1130#define atomic_readandclear_int		atomic_readandclear_32
1131#define atomic_load_acq_int		atomic_load_acq_32
1132#define atomic_store_rel_int		atomic_store_rel_32
1133
1134#endif /* _MACHINE_ATOMIC_H_ */
1135