atomic.h revision 279543
1/* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
2
3/*-
4 * Copyright (C) 2003-2004 Olivier Houchard
5 * Copyright (C) 1994-1997 Mark Brinicombe
6 * Copyright (C) 1994 Brini
7 * All rights reserved.
8 *
9 * This code is derived from software written for Brini by Mark Brinicombe
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by Brini.
22 * 4. The name of Brini may not be used to endorse or promote products
23 *    derived from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
26 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
28 * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
31 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
32 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
33 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
34 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 *
36 * $FreeBSD: head/sys/arm/include/atomic.h 279543 2015-03-02 20:40:25Z ian $
37 */
38
39#ifndef	_MACHINE_ATOMIC_H_
40#define	_MACHINE_ATOMIC_H_
41
42#include <sys/types.h>
43#include <machine/armreg.h>
44
45#ifndef _KERNEL
46#include <machine/sysarch.h>
47#else
48#include <machine/cpuconf.h>
49#endif
50
51#if defined (__ARM_ARCH_7__) || defined (__ARM_ARCH_7A__)
52#define isb()  __asm __volatile("isb" : : : "memory")
53#define dsb()  __asm __volatile("dsb" : : : "memory")
54#define dmb()  __asm __volatile("dmb" : : : "memory")
55#elif defined (__ARM_ARCH_6__) || defined (__ARM_ARCH_6J__) || \
56  defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6T2__) || \
57  defined (__ARM_ARCH_6Z__) || defined (__ARM_ARCH_6ZK__)
58#define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
59#define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
60#define dmb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
61#else
62#define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
63#define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
64#define dmb()  dsb()
65#endif
66
67#define mb()   dmb()
68#define wmb()  dmb()
69#define rmb()  dmb()
70
71
72
73/*
74 * It would be nice to use _HAVE_ARMv6_INSTRUCTIONS from machine/asm.h
75 * here, but that header can't be included here because this is C
76 * code.  I would like to move the _HAVE_ARMv6_INSTRUCTIONS definition
77 * out of asm.h so it can be used in both asm and C code. - kientzle@
78 */
79#if defined (__ARM_ARCH_7__) || \
80	defined (__ARM_ARCH_7A__)  || \
81	defined (__ARM_ARCH_6__)   || \
82	defined (__ARM_ARCH_6J__)  || \
83	defined (__ARM_ARCH_6K__)  || \
84	defined (__ARM_ARCH_6T2__) || \
85	defined (__ARM_ARCH_6Z__)  || \
86	defined (__ARM_ARCH_6ZK__)
87#define	ARM_HAVE_ATOMIC64
88
89static __inline void
90__do_dmb(void)
91{
92
93#if defined (__ARM_ARCH_7__) || defined (__ARM_ARCH_7A__)
94	__asm __volatile("dmb" : : : "memory");
95#else
96	__asm __volatile("mcr p15, 0, r0, c7, c10, 5" : : : "memory");
97#endif
98}
99
100#define ATOMIC_ACQ_REL_LONG(NAME)					\
101static __inline void							\
102atomic_##NAME##_acq_long(__volatile u_long *p, u_long v)		\
103{									\
104	atomic_##NAME##_long(p, v);					\
105	__do_dmb();							\
106}									\
107									\
108static __inline  void							\
109atomic_##NAME##_rel_long(__volatile u_long *p, u_long v)		\
110{									\
111	__do_dmb();							\
112	atomic_##NAME##_long(p, v);					\
113}
114
115#define	ATOMIC_ACQ_REL(NAME, WIDTH)					\
116static __inline  void							\
117atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
118{									\
119	atomic_##NAME##_##WIDTH(p, v);					\
120	__do_dmb();							\
121}									\
122									\
123static __inline  void							\
124atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
125{									\
126	__do_dmb();							\
127	atomic_##NAME##_##WIDTH(p, v);					\
128}
129
130static __inline void
131atomic_set_32(volatile uint32_t *address, uint32_t setmask)
132{
133	uint32_t tmp = 0, tmp2 = 0;
134
135	__asm __volatile("1: ldrex %0, [%2]\n"
136	    		    "orr %0, %0, %3\n"
137			    "strex %1, %0, [%2]\n"
138			    "cmp %1, #0\n"
139	                    "it ne\n"
140			    "bne	1b\n"
141			   : "=&r" (tmp), "+r" (tmp2)
142			   , "+r" (address), "+r" (setmask) : : "cc", "memory");
143
144}
145
146static __inline void
147atomic_set_64(volatile uint64_t *p, uint64_t val)
148{
149	uint64_t tmp;
150	uint32_t exflag;
151
152	__asm __volatile(
153		"1:          \n"
154		"   ldrexd   %[tmp], [%[ptr]]\n"
155		"   orr      %Q[tmp], %Q[val]\n"
156		"   orr      %R[tmp], %R[val]\n"
157		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
158		"   teq      %[exf], #0\n"
159		"   it ne    \n"
160		"   bne      1b\n"
161		:   [exf]    "=&r"  (exflag),
162		    [tmp]    "=&r"  (tmp)
163		:   [ptr]    "r"    (p),
164		    [val]    "r"    (val)
165		:   "cc", "memory");
166}
167
168static __inline void
169atomic_set_long(volatile u_long *address, u_long setmask)
170{
171	u_long tmp = 0, tmp2 = 0;
172
173	__asm __volatile("1: ldrex %0, [%2]\n"
174	    		    "orr %0, %0, %3\n"
175			    "strex %1, %0, [%2]\n"
176			    "cmp %1, #0\n"
177	                    "it ne\n"
178			    "bne	1b\n"
179			   : "=&r" (tmp), "+r" (tmp2)
180			   , "+r" (address), "+r" (setmask) : : "cc", "memory");
181
182}
183
184static __inline void
185atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
186{
187	uint32_t tmp = 0, tmp2 = 0;
188
189	__asm __volatile("1: ldrex %0, [%2]\n"
190	    		    "bic %0, %0, %3\n"
191			    "strex %1, %0, [%2]\n"
192			    "cmp %1, #0\n"
193	                    "it ne\n"
194			    "bne	1b\n"
195			   : "=&r" (tmp), "+r" (tmp2)
196			   ,"+r" (address), "+r" (setmask) : : "cc", "memory");
197}
198
199static __inline void
200atomic_clear_64(volatile uint64_t *p, uint64_t val)
201{
202	uint64_t tmp;
203	uint32_t exflag;
204
205	__asm __volatile(
206		"1:          \n"
207		"   ldrexd   %[tmp], [%[ptr]]\n"
208		"   bic      %Q[tmp], %Q[val]\n"
209		"   bic      %R[tmp], %R[val]\n"
210		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
211		"   teq      %[exf], #0\n"
212		"   it ne    \n"
213		"   bne      1b\n"
214		:   [exf]    "=&r"  (exflag),
215		    [tmp]    "=&r"  (tmp)
216		:   [ptr]    "r"    (p),
217		    [val]    "r"    (val)
218		:   "cc", "memory");
219}
220
221static __inline void
222atomic_clear_long(volatile u_long *address, u_long setmask)
223{
224	u_long tmp = 0, tmp2 = 0;
225
226	__asm __volatile("1: ldrex %0, [%2]\n"
227	    		    "bic %0, %0, %3\n"
228			    "strex %1, %0, [%2]\n"
229			    "cmp %1, #0\n"
230	                    "it ne\n"
231			    "bne	1b\n"
232			   : "=&r" (tmp), "+r" (tmp2)
233			   ,"+r" (address), "+r" (setmask) : : "cc", "memory");
234}
235
236static __inline u_int32_t
237atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
238{
239	uint32_t ret;
240
241	__asm __volatile("1: ldrex %0, [%1]\n"
242	                 "cmp %0, %2\n"
243	                 "itt ne\n"
244			 "movne %0, #0\n"
245			 "bne 2f\n"
246			 "strex %0, %3, [%1]\n"
247			 "cmp %0, #0\n"
248	                 "ite eq\n"
249			 "moveq %0, #1\n"
250			 "bne	1b\n"
251			 "2:"
252			 : "=&r" (ret)
253			 ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc",
254			 "memory");
255	return (ret);
256}
257
258static __inline int
259atomic_cmpset_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
260{
261	uint64_t tmp;
262	uint32_t ret;
263
264	__asm __volatile(
265		"1:          \n"
266		"   ldrexd   %[tmp], [%[ptr]]\n"
267		"   teq      %Q[tmp], %Q[cmpval]\n"
268		"   itee eq  \n"
269		"   teqeq    %R[tmp], %R[cmpval]\n"
270		"   movne    %[ret], #0\n"
271		"   bne      2f\n"
272		"   strexd   %[ret], %[newval], [%[ptr]]\n"
273		"   teq      %[ret], #0\n"
274		"   it ne    \n"
275		"   bne      1b\n"
276		"   mov      %[ret], #1\n"
277		"2:          \n"
278		:   [ret]    "=&r"  (ret),
279		    [tmp]    "=&r"  (tmp)
280		:   [ptr]    "r"    (p),
281		    [cmpval] "r"    (cmpval),
282		    [newval] "r"    (newval)
283		:   "cc", "memory");
284	return (ret);
285}
286
287static __inline u_long
288atomic_cmpset_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
289{
290	u_long ret;
291
292	__asm __volatile("1: ldrex %0, [%1]\n"
293	                 "cmp %0, %2\n"
294	                 "itt ne\n"
295			 "movne %0, #0\n"
296			 "bne 2f\n"
297			 "strex %0, %3, [%1]\n"
298			 "cmp %0, #0\n"
299	                 "ite eq\n"
300			 "moveq %0, #1\n"
301			 "bne	1b\n"
302			 "2:"
303			 : "=&r" (ret)
304			 ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc",
305			 "memory");
306	return (ret);
307}
308
309static __inline u_int32_t
310atomic_cmpset_acq_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
311{
312	u_int32_t ret = atomic_cmpset_32(p, cmpval, newval);
313
314	__do_dmb();
315	return (ret);
316}
317
318static __inline uint64_t
319atomic_cmpset_acq_64(volatile uint64_t *p, volatile uint64_t cmpval, volatile uint64_t newval)
320{
321	uint64_t ret = atomic_cmpset_64(p, cmpval, newval);
322
323	__do_dmb();
324	return (ret);
325}
326
327static __inline u_long
328atomic_cmpset_acq_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
329{
330	u_long ret = atomic_cmpset_long(p, cmpval, newval);
331
332	__do_dmb();
333	return (ret);
334}
335
336static __inline u_int32_t
337atomic_cmpset_rel_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
338{
339
340	__do_dmb();
341	return (atomic_cmpset_32(p, cmpval, newval));
342}
343
344static __inline uint64_t
345atomic_cmpset_rel_64(volatile uint64_t *p, volatile uint64_t cmpval, volatile uint64_t newval)
346{
347
348	__do_dmb();
349	return (atomic_cmpset_64(p, cmpval, newval));
350}
351
352static __inline u_long
353atomic_cmpset_rel_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
354{
355
356	__do_dmb();
357	return (atomic_cmpset_long(p, cmpval, newval));
358}
359
360
361static __inline void
362atomic_add_32(volatile u_int32_t *p, u_int32_t val)
363{
364	uint32_t tmp = 0, tmp2 = 0;
365
366	__asm __volatile("1: ldrex %0, [%2]\n"
367	    		    "add %0, %0, %3\n"
368			    "strex %1, %0, [%2]\n"
369			    "cmp %1, #0\n"
370	                    "it ne\n"
371			    "bne	1b\n"
372			    : "=&r" (tmp), "+r" (tmp2)
373			    ,"+r" (p), "+r" (val) : : "cc", "memory");
374}
375
376static __inline void
377atomic_add_64(volatile uint64_t *p, uint64_t val)
378{
379	uint64_t tmp;
380	uint32_t exflag;
381
382	__asm __volatile(
383		"1:          \n"
384		"   ldrexd   %[tmp], [%[ptr]]\n"
385		"   adds     %Q[tmp], %Q[val]\n"
386		"   adc      %R[tmp], %R[val]\n"
387		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
388		"   teq      %[exf], #0\n"
389		"   it ne    \n"
390		"   bne      1b\n"
391		:   [exf]    "=&r"  (exflag),
392		    [tmp]    "=&r"  (tmp)
393		:   [ptr]    "r"    (p),
394		    [val]    "r"    (val)
395		:   "cc", "memory");
396}
397
398static __inline void
399atomic_add_long(volatile u_long *p, u_long val)
400{
401	u_long tmp = 0, tmp2 = 0;
402
403	__asm __volatile("1: ldrex %0, [%2]\n"
404	    		    "add %0, %0, %3\n"
405			    "strex %1, %0, [%2]\n"
406			    "cmp %1, #0\n"
407	                    "it ne\n"
408			    "bne	1b\n"
409			    : "=&r" (tmp), "+r" (tmp2)
410			    ,"+r" (p), "+r" (val) : : "cc", "memory");
411}
412
413static __inline void
414atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
415{
416	uint32_t tmp = 0, tmp2 = 0;
417
418	__asm __volatile("1: ldrex %0, [%2]\n"
419	    		    "sub %0, %0, %3\n"
420			    "strex %1, %0, [%2]\n"
421			    "cmp %1, #0\n"
422	                    "it ne\n"
423			    "bne	1b\n"
424			    : "=&r" (tmp), "+r" (tmp2)
425			    ,"+r" (p), "+r" (val) : : "cc", "memory");
426}
427
428static __inline void
429atomic_subtract_64(volatile uint64_t *p, uint64_t val)
430{
431	uint64_t tmp;
432	uint32_t exflag;
433
434	__asm __volatile(
435		"1:          \n"
436		"   ldrexd   %[tmp], [%[ptr]]\n"
437		"   subs     %Q[tmp], %Q[val]\n"
438		"   sbc      %R[tmp], %R[val]\n"
439		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
440		"   teq      %[exf], #0\n"
441		"   it ne    \n"
442		"   bne      1b\n"
443		:   [exf]    "=&r"  (exflag),
444		    [tmp]    "=&r"  (tmp)
445		:   [ptr]    "r"    (p),
446		    [val]    "r"    (val)
447		:   "cc", "memory");
448}
449
450static __inline void
451atomic_subtract_long(volatile u_long *p, u_long val)
452{
453	u_long tmp = 0, tmp2 = 0;
454
455	__asm __volatile("1: ldrex %0, [%2]\n"
456	    		    "sub %0, %0, %3\n"
457			    "strex %1, %0, [%2]\n"
458			    "cmp %1, #0\n"
459	                    "it ne\n"
460			    "bne	1b\n"
461			    : "=&r" (tmp), "+r" (tmp2)
462			    ,"+r" (p), "+r" (val) : : "cc", "memory");
463}
464
465ATOMIC_ACQ_REL(clear, 32)
466ATOMIC_ACQ_REL(add, 32)
467ATOMIC_ACQ_REL(subtract, 32)
468ATOMIC_ACQ_REL(set, 32)
469ATOMIC_ACQ_REL(clear, 64)
470ATOMIC_ACQ_REL(add, 64)
471ATOMIC_ACQ_REL(subtract, 64)
472ATOMIC_ACQ_REL(set, 64)
473ATOMIC_ACQ_REL_LONG(clear)
474ATOMIC_ACQ_REL_LONG(add)
475ATOMIC_ACQ_REL_LONG(subtract)
476ATOMIC_ACQ_REL_LONG(set)
477
478#undef ATOMIC_ACQ_REL
479#undef ATOMIC_ACQ_REL_LONG
480
481static __inline uint32_t
482atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
483{
484	uint32_t tmp = 0, tmp2 = 0, ret = 0;
485
486	__asm __volatile("1: ldrex %0, [%3]\n"
487	    		    "add %1, %0, %4\n"
488			    "strex %2, %1, [%3]\n"
489			    "cmp %2, #0\n"
490	                    "it ne\n"
491			    "bne	1b\n"
492			   : "+r" (ret), "=&r" (tmp), "+r" (tmp2)
493			   ,"+r" (p), "+r" (val) : : "cc", "memory");
494	return (ret);
495}
496
497static __inline uint32_t
498atomic_readandclear_32(volatile u_int32_t *p)
499{
500	uint32_t ret, tmp = 0, tmp2 = 0;
501
502	__asm __volatile("1: ldrex %0, [%3]\n"
503	    		 "mov %1, #0\n"
504			 "strex %2, %1, [%3]\n"
505			 "cmp %2, #0\n"
506	                 "it ne\n"
507			 "bne 1b\n"
508			 : "=r" (ret), "=&r" (tmp), "+r" (tmp2)
509			 ,"+r" (p) : : "cc", "memory");
510	return (ret);
511}
512
513static __inline uint32_t
514atomic_load_acq_32(volatile uint32_t *p)
515{
516	uint32_t v;
517
518	v = *p;
519	__do_dmb();
520	return (v);
521}
522
523static __inline void
524atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
525{
526
527	__do_dmb();
528	*p = v;
529}
530
531static __inline uint64_t
532atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
533{
534	uint64_t ret, tmp;
535	uint32_t exflag;
536
537	__asm __volatile(
538		"1:          \n"
539		"   ldrexd   %[ret], [%[ptr]]\n"
540		"   adds     %Q[tmp], %Q[ret], %Q[val]\n"
541		"   adc      %R[tmp], %R[ret], %R[val]\n"
542		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
543		"   teq      %[exf], #0\n"
544		"   it ne    \n"
545		"   bne      1b\n"
546		:   [ret]    "=&r"  (ret),
547		    [exf]    "=&r"  (exflag),
548		    [tmp]    "=&r"  (tmp)
549		:   [ptr]    "r"    (p),
550		    [val]    "r"    (val)
551		:   "cc", "memory");
552	return (ret);
553}
554
555static __inline uint64_t
556atomic_readandclear_64(volatile uint64_t *p)
557{
558	uint64_t ret, tmp;
559	uint32_t exflag;
560
561	__asm __volatile(
562		"1:          \n"
563		"   ldrexd   %[ret], [%[ptr]]\n"
564		"   mov      %Q[tmp], #0\n"
565		"   mov      %R[tmp], #0\n"
566		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
567		"   teq      %[exf], #0\n"
568		"   it ne    \n"
569		"   bne      1b\n"
570		:   [ret]    "=&r"  (ret),
571		    [exf]    "=&r"  (exflag),
572		    [tmp]    "=&r"  (tmp)
573		:   [ptr]    "r"    (p)
574		:   "cc", "memory");
575	return (ret);
576}
577
578static __inline uint64_t
579atomic_load_64(volatile uint64_t *p)
580{
581	uint64_t ret;
582
583	/*
584	 * The only way to atomically load 64 bits is with LDREXD which puts the
585	 * exclusive monitor into the exclusive state, so reset it to open state
586	 * with CLREX because we don't actually need to store anything.
587	 */
588	__asm __volatile(
589		"1:          \n"
590		"   ldrexd   %[ret], [%[ptr]]\n"
591		"   clrex    \n"
592		:   [ret]    "=&r"  (ret)
593		:   [ptr]    "r"    (p)
594		:   "cc", "memory");
595	return (ret);
596}
597
598static __inline uint64_t
599atomic_load_acq_64(volatile uint64_t *p)
600{
601	uint64_t ret;
602
603	ret = atomic_load_64(p);
604	__do_dmb();
605	return (ret);
606}
607
608static __inline void
609atomic_store_64(volatile uint64_t *p, uint64_t val)
610{
611	uint64_t tmp;
612	uint32_t exflag;
613
614	/*
615	 * The only way to atomically store 64 bits is with STREXD, which will
616	 * succeed only if paired up with a preceeding LDREXD using the same
617	 * address, so we read and discard the existing value before storing.
618	 */
619	__asm __volatile(
620		"1:          \n"
621		"   ldrexd   %[tmp], [%[ptr]]\n"
622		"   strexd   %[exf], %[val], [%[ptr]]\n"
623		"   teq      %[exf], #0\n"
624		"   it ne    \n"
625		"   bne      1b\n"
626		:   [tmp]    "=&r"  (tmp),
627		    [exf]    "=&r"  (exflag)
628		:   [ptr]    "r"    (p),
629		    [val]    "r"    (val)
630		:   "cc", "memory");
631}
632
633static __inline void
634atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
635{
636
637	__do_dmb();
638	atomic_store_64(p, val);
639}
640
641static __inline u_long
642atomic_fetchadd_long(volatile u_long *p, u_long val)
643{
644	u_long tmp = 0, tmp2 = 0, ret = 0;
645
646	__asm __volatile("1: ldrex %0, [%3]\n"
647	    		    "add %1, %0, %4\n"
648			    "strex %2, %1, [%3]\n"
649			    "cmp %2, #0\n"
650	                    "it ne\n"
651			    "bne	1b\n"
652			   : "+r" (ret), "=&r" (tmp), "+r" (tmp2)
653			   ,"+r" (p), "+r" (val) : : "cc", "memory");
654	return (ret);
655}
656
657static __inline u_long
658atomic_readandclear_long(volatile u_long *p)
659{
660	u_long ret, tmp = 0, tmp2 = 0;
661
662	__asm __volatile("1: ldrex %0, [%3]\n"
663	    		 "mov %1, #0\n"
664			 "strex %2, %1, [%3]\n"
665			 "cmp %2, #0\n"
666	                 "it ne\n"
667			 "bne 1b\n"
668			 : "=r" (ret), "=&r" (tmp), "+r" (tmp2)
669			 ,"+r" (p) : : "cc", "memory");
670	return (ret);
671}
672
673static __inline u_long
674atomic_load_acq_long(volatile u_long *p)
675{
676	u_long v;
677
678	v = *p;
679	__do_dmb();
680	return (v);
681}
682
683static __inline void
684atomic_store_rel_long(volatile u_long *p, u_long v)
685{
686
687	__do_dmb();
688	*p = v;
689}
690#else /* < armv6 */
691
692#define __with_interrupts_disabled(expr) \
693	do {						\
694		u_int cpsr_save, tmp;			\
695							\
696		__asm __volatile(			\
697			"mrs  %0, cpsr;"		\
698			"orr  %1, %0, %2;"		\
699			"msr  cpsr_fsxc, %1;"		\
700			: "=r" (cpsr_save), "=r" (tmp)	\
701			: "I" (PSR_I | PSR_F)		\
702		        : "cc" );		\
703		(expr);				\
704		 __asm __volatile(		\
705			"msr  cpsr_fsxc, %0"	\
706			: /* no output */	\
707			: "r" (cpsr_save)	\
708			: "cc" );		\
709	} while(0)
710
711static __inline uint32_t
712__swp(uint32_t val, volatile uint32_t *ptr)
713{
714	__asm __volatile("swp	%0, %2, [%3]"
715	    : "=&r" (val), "=m" (*ptr)
716	    : "r" (val), "r" (ptr), "m" (*ptr)
717	    : "memory");
718	return (val);
719}
720
721
722#ifdef _KERNEL
723#define	ARM_HAVE_ATOMIC64
724
725static __inline void
726atomic_set_32(volatile uint32_t *address, uint32_t setmask)
727{
728	__with_interrupts_disabled(*address |= setmask);
729}
730
731static __inline void
732atomic_set_64(volatile uint64_t *address, uint64_t setmask)
733{
734	__with_interrupts_disabled(*address |= setmask);
735}
736
737static __inline void
738atomic_clear_32(volatile uint32_t *address, uint32_t clearmask)
739{
740	__with_interrupts_disabled(*address &= ~clearmask);
741}
742
743static __inline void
744atomic_clear_64(volatile uint64_t *address, uint64_t clearmask)
745{
746	__with_interrupts_disabled(*address &= ~clearmask);
747}
748
749static __inline u_int32_t
750atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
751{
752	int ret;
753
754	__with_interrupts_disabled(
755	 {
756	    	if (*p == cmpval) {
757			*p = newval;
758			ret = 1;
759		} else {
760			ret = 0;
761		}
762	});
763	return (ret);
764}
765
766static __inline u_int64_t
767atomic_cmpset_64(volatile u_int64_t *p, volatile u_int64_t cmpval, volatile u_int64_t newval)
768{
769	int ret;
770
771	__with_interrupts_disabled(
772	 {
773	    	if (*p == cmpval) {
774			*p = newval;
775			ret = 1;
776		} else {
777			ret = 0;
778		}
779	});
780	return (ret);
781}
782
783static __inline void
784atomic_add_32(volatile u_int32_t *p, u_int32_t val)
785{
786	__with_interrupts_disabled(*p += val);
787}
788
789static __inline void
790atomic_add_64(volatile u_int64_t *p, u_int64_t val)
791{
792	__with_interrupts_disabled(*p += val);
793}
794
795static __inline void
796atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
797{
798	__with_interrupts_disabled(*p -= val);
799}
800
801static __inline void
802atomic_subtract_64(volatile u_int64_t *p, u_int64_t val)
803{
804	__with_interrupts_disabled(*p -= val);
805}
806
807static __inline uint32_t
808atomic_fetchadd_32(volatile uint32_t *p, uint32_t v)
809{
810	uint32_t value;
811
812	__with_interrupts_disabled(
813	{
814	    	value = *p;
815		*p += v;
816	});
817	return (value);
818}
819
820static __inline uint64_t
821atomic_fetchadd_64(volatile uint64_t *p, uint64_t v)
822{
823	uint64_t value;
824
825	__with_interrupts_disabled(
826	{
827	    	value = *p;
828		*p += v;
829	});
830	return (value);
831}
832
833static __inline uint64_t
834atomic_load_64(volatile uint64_t *p)
835{
836	uint64_t value;
837
838	__with_interrupts_disabled(value = *p);
839	return (value);
840}
841
842static __inline void
843atomic_store_64(volatile uint64_t *p, uint64_t value)
844{
845	__with_interrupts_disabled(*p = value);
846}
847
848#else /* !_KERNEL */
849
850static __inline u_int32_t
851atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
852{
853	register int done, ras_start = ARM_RAS_START;
854
855	__asm __volatile("1:\n"
856	    "adr	%1, 1b\n"
857	    "str	%1, [%0]\n"
858	    "adr	%1, 2f\n"
859	    "str	%1, [%0, #4]\n"
860	    "ldr	%1, [%2]\n"
861	    "cmp	%1, %3\n"
862	    "streq	%4, [%2]\n"
863	    "2:\n"
864	    "mov	%1, #0\n"
865	    "str	%1, [%0]\n"
866	    "mov	%1, #0xffffffff\n"
867	    "str	%1, [%0, #4]\n"
868	    "moveq	%1, #1\n"
869	    "movne	%1, #0\n"
870	    : "+r" (ras_start), "=r" (done)
871	    ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc", "memory");
872	return (done);
873}
874
875static __inline void
876atomic_add_32(volatile u_int32_t *p, u_int32_t val)
877{
878	int start, ras_start = ARM_RAS_START;
879
880	__asm __volatile("1:\n"
881	    "adr	%1, 1b\n"
882	    "str	%1, [%0]\n"
883	    "adr	%1, 2f\n"
884	    "str	%1, [%0, #4]\n"
885	    "ldr	%1, [%2]\n"
886	    "add	%1, %1, %3\n"
887	    "str	%1, [%2]\n"
888	    "2:\n"
889	    "mov	%1, #0\n"
890	    "str	%1, [%0]\n"
891	    "mov	%1, #0xffffffff\n"
892	    "str	%1, [%0, #4]\n"
893	    : "+r" (ras_start), "=r" (start), "+r" (p), "+r" (val)
894	    : : "memory");
895}
896
897static __inline void
898atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
899{
900	int start, ras_start = ARM_RAS_START;
901
902	__asm __volatile("1:\n"
903	    "adr	%1, 1b\n"
904	    "str	%1, [%0]\n"
905	    "adr	%1, 2f\n"
906	    "str	%1, [%0, #4]\n"
907	    "ldr	%1, [%2]\n"
908	    "sub	%1, %1, %3\n"
909	    "str	%1, [%2]\n"
910	    "2:\n"
911	    "mov	%1, #0\n"
912	    "str	%1, [%0]\n"
913	    "mov	%1, #0xffffffff\n"
914	    "str	%1, [%0, #4]\n"
915
916	    : "+r" (ras_start), "=r" (start), "+r" (p), "+r" (val)
917	    : : "memory");
918}
919
920static __inline void
921atomic_set_32(volatile uint32_t *address, uint32_t setmask)
922{
923	int start, ras_start = ARM_RAS_START;
924
925	__asm __volatile("1:\n"
926	    "adr	%1, 1b\n"
927	    "str	%1, [%0]\n"
928	    "adr	%1, 2f\n"
929	    "str	%1, [%0, #4]\n"
930	    "ldr	%1, [%2]\n"
931	    "orr	%1, %1, %3\n"
932	    "str	%1, [%2]\n"
933	    "2:\n"
934	    "mov	%1, #0\n"
935	    "str	%1, [%0]\n"
936	    "mov	%1, #0xffffffff\n"
937	    "str	%1, [%0, #4]\n"
938
939	    : "+r" (ras_start), "=r" (start), "+r" (address), "+r" (setmask)
940	    : : "memory");
941}
942
943static __inline void
944atomic_clear_32(volatile uint32_t *address, uint32_t clearmask)
945{
946	int start, ras_start = ARM_RAS_START;
947
948	__asm __volatile("1:\n"
949	    "adr	%1, 1b\n"
950	    "str	%1, [%0]\n"
951	    "adr	%1, 2f\n"
952	    "str	%1, [%0, #4]\n"
953	    "ldr	%1, [%2]\n"
954	    "bic	%1, %1, %3\n"
955	    "str	%1, [%2]\n"
956	    "2:\n"
957	    "mov	%1, #0\n"
958	    "str	%1, [%0]\n"
959	    "mov	%1, #0xffffffff\n"
960	    "str	%1, [%0, #4]\n"
961	    : "+r" (ras_start), "=r" (start), "+r" (address), "+r" (clearmask)
962	    : : "memory");
963
964}
965
966static __inline uint32_t
967atomic_fetchadd_32(volatile uint32_t *p, uint32_t v)
968{
969	uint32_t start, tmp, ras_start = ARM_RAS_START;
970
971	__asm __volatile("1:\n"
972	    "adr	%1, 1b\n"
973	    "str	%1, [%0]\n"
974	    "adr	%1, 2f\n"
975	    "str	%1, [%0, #4]\n"
976	    "ldr	%1, [%3]\n"
977	    "mov	%2, %1\n"
978	    "add	%2, %2, %4\n"
979	    "str	%2, [%3]\n"
980	    "2:\n"
981	    "mov	%2, #0\n"
982	    "str	%2, [%0]\n"
983	    "mov	%2, #0xffffffff\n"
984	    "str	%2, [%0, #4]\n"
985	    : "+r" (ras_start), "=r" (start), "=r" (tmp), "+r" (p), "+r" (v)
986	    : : "memory");
987	return (start);
988}
989
990#endif /* _KERNEL */
991
992
993static __inline uint32_t
994atomic_readandclear_32(volatile u_int32_t *p)
995{
996
997	return (__swp(0, p));
998}
999
1000#define atomic_cmpset_rel_32	atomic_cmpset_32
1001#define atomic_cmpset_acq_32	atomic_cmpset_32
1002#define atomic_set_rel_32	atomic_set_32
1003#define atomic_set_acq_32	atomic_set_32
1004#define atomic_clear_rel_32	atomic_clear_32
1005#define atomic_clear_acq_32	atomic_clear_32
1006#define atomic_add_rel_32	atomic_add_32
1007#define atomic_add_acq_32	atomic_add_32
1008#define atomic_subtract_rel_32	atomic_subtract_32
1009#define atomic_subtract_acq_32	atomic_subtract_32
1010#define atomic_store_rel_32	atomic_store_32
1011#define atomic_store_rel_long	atomic_store_long
1012#define atomic_load_acq_32	atomic_load_32
1013#define atomic_load_acq_long	atomic_load_long
1014#define atomic_add_acq_long		atomic_add_long
1015#define atomic_add_rel_long		atomic_add_long
1016#define atomic_subtract_acq_long	atomic_subtract_long
1017#define atomic_subtract_rel_long	atomic_subtract_long
1018#define atomic_clear_acq_long		atomic_clear_long
1019#define atomic_clear_rel_long		atomic_clear_long
1020#define atomic_set_acq_long		atomic_set_long
1021#define atomic_set_rel_long		atomic_set_long
1022#define atomic_cmpset_acq_long		atomic_cmpset_long
1023#define atomic_cmpset_rel_long		atomic_cmpset_long
1024#define atomic_load_acq_long		atomic_load_long
1025#undef __with_interrupts_disabled
1026
1027static __inline void
1028atomic_add_long(volatile u_long *p, u_long v)
1029{
1030
1031	atomic_add_32((volatile uint32_t *)p, v);
1032}
1033
1034static __inline void
1035atomic_clear_long(volatile u_long *p, u_long v)
1036{
1037
1038	atomic_clear_32((volatile uint32_t *)p, v);
1039}
1040
1041static __inline int
1042atomic_cmpset_long(volatile u_long *dst, u_long old, u_long newe)
1043{
1044
1045	return (atomic_cmpset_32((volatile uint32_t *)dst, old, newe));
1046}
1047
1048static __inline u_long
1049atomic_fetchadd_long(volatile u_long *p, u_long v)
1050{
1051
1052	return (atomic_fetchadd_32((volatile uint32_t *)p, v));
1053}
1054
1055static __inline void
1056atomic_readandclear_long(volatile u_long *p)
1057{
1058
1059	atomic_readandclear_32((volatile uint32_t *)p);
1060}
1061
1062static __inline void
1063atomic_set_long(volatile u_long *p, u_long v)
1064{
1065
1066	atomic_set_32((volatile uint32_t *)p, v);
1067}
1068
1069static __inline void
1070atomic_subtract_long(volatile u_long *p, u_long v)
1071{
1072
1073	atomic_subtract_32((volatile uint32_t *)p, v);
1074}
1075
1076
1077
1078#endif /* Arch >= v6 */
1079
1080static __inline int
1081atomic_load_32(volatile uint32_t *v)
1082{
1083
1084	return (*v);
1085}
1086
1087static __inline void
1088atomic_store_32(volatile uint32_t *dst, uint32_t src)
1089{
1090	*dst = src;
1091}
1092
1093static __inline int
1094atomic_load_long(volatile u_long *v)
1095{
1096
1097	return (*v);
1098}
1099
1100static __inline void
1101atomic_store_long(volatile u_long *dst, u_long src)
1102{
1103	*dst = src;
1104}
1105
1106#define atomic_clear_ptr		atomic_clear_32
1107#define atomic_set_ptr			atomic_set_32
1108#define atomic_cmpset_ptr		atomic_cmpset_32
1109#define atomic_cmpset_rel_ptr		atomic_cmpset_rel_32
1110#define atomic_cmpset_acq_ptr		atomic_cmpset_acq_32
1111#define atomic_store_ptr		atomic_store_32
1112#define atomic_store_rel_ptr		atomic_store_rel_32
1113
1114#define atomic_add_int			atomic_add_32
1115#define atomic_add_acq_int		atomic_add_acq_32
1116#define atomic_add_rel_int		atomic_add_rel_32
1117#define atomic_subtract_int		atomic_subtract_32
1118#define atomic_subtract_acq_int		atomic_subtract_acq_32
1119#define atomic_subtract_rel_int		atomic_subtract_rel_32
1120#define atomic_clear_int		atomic_clear_32
1121#define atomic_clear_acq_int		atomic_clear_acq_32
1122#define atomic_clear_rel_int		atomic_clear_rel_32
1123#define atomic_set_int			atomic_set_32
1124#define atomic_set_acq_int		atomic_set_acq_32
1125#define atomic_set_rel_int		atomic_set_rel_32
1126#define atomic_cmpset_int		atomic_cmpset_32
1127#define atomic_cmpset_acq_int		atomic_cmpset_acq_32
1128#define atomic_cmpset_rel_int		atomic_cmpset_rel_32
1129#define atomic_fetchadd_int		atomic_fetchadd_32
1130#define atomic_readandclear_int		atomic_readandclear_32
1131#define atomic_load_acq_int		atomic_load_acq_32
1132#define atomic_store_rel_int		atomic_store_rel_32
1133
1134#endif /* _MACHINE_ATOMIC_H_ */
1135