1/*-
2 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <stdint.h>
28#include <sys/cdefs.h>
29// __FBSDID("$FreeBSD: head/sys/arm/arm/stdatomic.c 255738 2013-09-20 20:44:32Z zbb $");
30
31#define __SYNC_ATOMICS
32#define __strong_reference(sym,aliassym)        \
33	extern __typeof (sym) aliassym __attribute__ ((__alias__ (#sym)))
34
35#include <sys/param.h>
36#include <sys/types.h>
37
38#ifdef _KERNEL
39#include "opt_global.h"
40#endif
41
42/*
43 * Executing statements with interrupts disabled.
44 */
45
46#if defined(_KERNEL) && !defined(SMP)
47#define	WITHOUT_INTERRUPTS(s) do {					\
48	register_t regs;						\
49									\
50	regs = intr_disable();						\
51	do s while (0);							\
52	intr_restore(regs);						\
53} while (0)
54#endif /* _KERNEL && !SMP */
55
56/*
57 * Memory barriers.
58 *
59 * It turns out __sync_synchronize() does not emit any code when used
60 * with GCC 4.2. Implement our own version that does work reliably.
61 *
62 * Although __sync_lock_test_and_set() should only perform an acquire
63 * barrier, make it do a full barrier like the other functions. This
64 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
65 */
66
67#if defined(_KERNEL) && !defined(SMP)
68static inline void
69do_sync(void)
70{
71
72	__asm volatile ("" : : : "memory");
73}
74#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
75static inline void
76do_sync(void)
77{
78
79	__asm volatile ("dmb" : : : "memory");
80}
81#elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
82    defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
83    defined(__ARM_ARCH_6ZK__)
84static inline void
85do_sync(void)
86{
87
88	__asm volatile ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory");
89}
90#endif
91
92#if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS)
93
94/*
95 * New C11 __atomic_* API.
96 */
97
98#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
99    defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
100    defined(__ARM_ARCH_6ZK__) || \
101    defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
102
103/* These systems should be supported by the compiler. */
104
105#else /* __ARM_ARCH_5__ */
106
107/* Clang doesn't allow us to reimplement builtins without this. */
108#ifdef __clang__
109#pragma redefine_extname __sync_synchronize_ext __sync_synchronize
110#define __sync_synchronize __sync_synchronize_ext
111#endif
112
113void
114__sync_synchronize(void)
115{
116}
117
118#ifdef _KERNEL
119
120#ifdef SMP
121#error "On SMP systems we should have proper atomic operations."
122#endif
123
124/*
125 * On uniprocessor systems, we can perform the atomic operations by
126 * disabling interrupts.
127 */
128
129#define	EMIT_LOAD_N(N, uintN_t)						\
130uintN_t									\
131__atomic_load_##N(uintN_t *mem, int model __unused)			\
132{									\
133	uintN_t ret;							\
134									\
135	WITHOUT_INTERRUPTS({						\
136		ret = *mem;						\
137	});								\
138	return (ret);							\
139}
140
141#define	EMIT_STORE_N(N, uintN_t)					\
142void									\
143__atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)	\
144{									\
145									\
146	WITHOUT_INTERRUPTS({						\
147		*mem = val;						\
148	});								\
149}
150
151#define	EMIT_COMPARE_EXCHANGE_N(N, uintN_t)				\
152_Bool									\
153__atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected,		\
154    uintN_t desired, int success __unused, int failure __unused)	\
155{									\
156	_Bool ret;							\
157									\
158	WITHOUT_INTERRUPTS({						\
159		if (*mem == *expected) {				\
160			*mem = desired;					\
161			ret = 1;					\
162		} else {						\
163			*expected = *mem;				\
164			ret = 0;					\
165		}							\
166	});								\
167	return (ret);							\
168}
169
170#define	EMIT_FETCH_OP_N(N, uintN_t, name, op)				\
171uintN_t									\
172__atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)	\
173{									\
174	uintN_t ret;							\
175									\
176	WITHOUT_INTERRUPTS({						\
177		ret = *mem;						\
178		*mem op val;						\
179	});								\
180	return (ret);							\
181}
182
183#define	EMIT_ALL_OPS_N(N, uintN_t)					\
184EMIT_LOAD_N(N, uintN_t)							\
185EMIT_STORE_N(N, uintN_t)						\
186EMIT_COMPARE_EXCHANGE_N(N, uintN_t)					\
187EMIT_FETCH_OP_N(N, uintN_t, exchange, =)				\
188EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=)				\
189EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=)				\
190EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=)				\
191EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=)				\
192EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=)
193
194EMIT_ALL_OPS_N(1, uint8_t)
195EMIT_ALL_OPS_N(2, uint16_t)
196EMIT_ALL_OPS_N(4, uint32_t)
197EMIT_ALL_OPS_N(8, uint64_t)
198#undef	EMIT_ALL_OPS_N
199
200#else /* !_KERNEL */
201
202/*
203 * For userspace on uniprocessor systems, we can implement the atomic
204 * operations by using a Restartable Atomic Sequence. This makes the
205 * kernel restart the code from the beginning when interrupted.
206 */
207
208#define	EMIT_LOAD_N(N, uintN_t)						\
209uintN_t									\
210__atomic_load_##N(uintN_t *mem, int model __unused)			\
211{									\
212									\
213	return (*mem);							\
214}
215
216#define	EMIT_STORE_N(N, uintN_t)					\
217void									\
218__atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)	\
219{									\
220									\
221	*mem = val;							\
222}
223
224#define	EMIT_EXCHANGE_N(N, uintN_t, ldr, str)				\
225uintN_t									\
226__atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused)	\
227{									\
228	uint32_t old, temp, ras_start;					\
229									\
230	ras_start = ARM_RAS_START;					\
231	__asm volatile (						\
232		/* Set up Restartable Atomic Sequence. */		\
233		"1:"							\
234		"\tadr   %2, 1b\n"					\
235		"\tstr   %2, [%5]\n"					\
236		"\tadr   %2, 2f\n"					\
237		"\tstr   %2, [%5, #4]\n"				\
238									\
239		"\t"ldr" %0, %4\n"	/* Load old value. */		\
240		"\t"str" %3, %1\n"	/* Store new value. */		\
241									\
242		/* Tear down Restartable Atomic Sequence. */		\
243		"2:"							\
244		"\tmov   %2, #0x00000000\n"				\
245		"\tstr   %2, [%5]\n"					\
246		"\tmov   %2, #0xffffffff\n"				\
247		"\tstr   %2, [%5, #4]\n"				\
248		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
249		: "r" (val), "m" (*mem), "r" (ras_start));		\
250	return (old);							\
251}
252
253#define	EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)			\
254_Bool									\
255__atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected,		\
256    uintN_t desired, int success __unused, int failure __unused)	\
257{									\
258	uint32_t expected, old, temp, ras_start;			\
259									\
260	expected = *pexpected;						\
261	ras_start = ARM_RAS_START;					\
262	__asm volatile (						\
263		/* Set up Restartable Atomic Sequence. */		\
264		"1:"							\
265		"\tadr   %2, 1b\n"					\
266		"\tstr   %2, [%6]\n"					\
267		"\tadr   %2, 2f\n"					\
268		"\tstr   %2, [%6, #4]\n"				\
269									\
270		"\t"ldr" %0, %5\n"	/* Load old value. */		\
271		"\tcmp   %0, %3\n"	/* Compare to expected value. */\
272		"\t"streq" %4, %1\n"	/* Store new value. */		\
273									\
274		/* Tear down Restartable Atomic Sequence. */		\
275		"2:"							\
276		"\tmov   %2, #0x00000000\n"				\
277		"\tstr   %2, [%6]\n"					\
278		"\tmov   %2, #0xffffffff\n"				\
279		"\tstr   %2, [%6, #4]\n"				\
280		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
281		: "r" (expected), "r" (desired), "m" (*mem),		\
282		  "r" (ras_start));					\
283	if (old == expected) {						\
284		return (1);						\
285	} else {							\
286		*pexpected = old;					\
287		return (0);						\
288	}								\
289}
290
291#define	EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op)			\
292uintN_t									\
293__atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)	\
294{									\
295	uint32_t old, temp, ras_start;					\
296									\
297	ras_start = ARM_RAS_START;					\
298	__asm volatile (						\
299		/* Set up Restartable Atomic Sequence. */		\
300		"1:"							\
301		"\tadr   %2, 1b\n"					\
302		"\tstr   %2, [%5]\n"					\
303		"\tadr   %2, 2f\n"					\
304		"\tstr   %2, [%5, #4]\n"				\
305									\
306		"\t"ldr" %0, %4\n"	/* Load old value. */		\
307		"\t"op"  %2, %0, %3\n"	/* Calculate new value. */	\
308		"\t"str" %2, %1\n"	/* Store new value. */		\
309									\
310		/* Tear down Restartable Atomic Sequence. */		\
311		"2:"							\
312		"\tmov   %2, #0x00000000\n"				\
313		"\tstr   %2, [%5]\n"					\
314		"\tmov   %2, #0xffffffff\n"				\
315		"\tstr   %2, [%5, #4]\n"				\
316		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
317		: "r" (val), "m" (*mem), "r" (ras_start));		\
318	return (old);							\
319}
320
321#define	EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)			\
322EMIT_LOAD_N(N, uintN_t)							\
323EMIT_STORE_N(N, uintN_t)						\
324EMIT_EXCHANGE_N(N, uintN_t, ldr, str)					\
325EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)				\
326EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add")			\
327EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and")			\
328EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or, "orr")			\
329EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub")			\
330EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor")
331
332EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
333EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
334EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
335#undef	EMIT_ALL_OPS_N
336
337#endif /* _KERNEL */
338
339#endif
340
341#endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */
342
343#if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS)
344
345#ifdef __clang__
346#pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1
347#pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2
348#pragma	redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4
349#pragma	redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1
350#pragma	redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2
351#pragma	redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4
352#pragma	redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1
353#pragma	redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2
354#pragma	redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1
355#pragma	redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2
356#pragma	redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4
357#pragma	redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1
358#pragma	redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2
359#pragma	redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4
360#pragma	redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1
361#pragma	redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2
362#pragma	redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4
363#pragma	redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1
364#pragma	redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2
365#pragma	redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4
366#endif
367
368/*
369 * Old __sync_* API.
370 */
371
372#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
373    defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
374    defined(__ARM_ARCH_6ZK__) || \
375    defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
376
377/* Implementations for old GCC versions, lacking support for atomics. */
378
379typedef union {
380	uint8_t		v8[4];
381	uint32_t	v32;
382} reg_t;
383
384/*
385 * Given a memory address pointing to an 8-bit or 16-bit integer, return
386 * the address of the 32-bit word containing it.
387 */
388
389static inline uint32_t *
390round_to_word(void *ptr)
391{
392
393	return ((uint32_t *)((intptr_t)ptr & ~3));
394}
395
396/*
397 * Utility functions for loading and storing 8-bit and 16-bit integers
398 * in 32-bit words at an offset corresponding with the location of the
399 * atomic variable.
400 */
401
402static inline void
403put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
404{
405	size_t offset;
406
407	offset = (intptr_t)offset_ptr & 3;
408	r->v8[offset] = val;
409}
410
411static inline uint8_t
412get_1(const reg_t *r, const uint8_t *offset_ptr)
413{
414	size_t offset;
415
416	offset = (intptr_t)offset_ptr & 3;
417	return (r->v8[offset]);
418}
419
420static inline void
421put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
422{
423	size_t offset;
424	union {
425		uint16_t in;
426		uint8_t out[2];
427	} bytes;
428
429	offset = (intptr_t)offset_ptr & 3;
430	bytes.in = val;
431	r->v8[offset] = bytes.out[0];
432	r->v8[offset + 1] = bytes.out[1];
433}
434
435static inline uint16_t
436get_2(const reg_t *r, const uint16_t *offset_ptr)
437{
438	size_t offset;
439	union {
440		uint8_t in[2];
441		uint16_t out;
442	} bytes;
443
444	offset = (intptr_t)offset_ptr & 3;
445	bytes.in[0] = r->v8[offset];
446	bytes.in[1] = r->v8[offset + 1];
447	return (bytes.out);
448}
449
450/*
451 * 8-bit and 16-bit routines.
452 *
453 * These operations are not natively supported by the CPU, so we use
454 * some shifting and bitmasking on top of the 32-bit instructions.
455 */
456
457#define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t)				\
458uintN_t									\
459__sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)			\
460{									\
461	uint32_t *mem32;						\
462	reg_t val32, negmask, old;					\
463	uint32_t temp1, temp2;						\
464									\
465	mem32 = round_to_word(mem);					\
466	val32.v32 = 0x00000000;						\
467	put_##N(&val32, mem, val);					\
468	negmask.v32 = 0xffffffff;					\
469	put_##N(&negmask, mem, 0);					\
470									\
471	do_sync();							\
472	__asm volatile (						\
473		"1:"							\
474		"\tldrex %0, %6\n"	/* Load old value. */		\
475		"\tand   %2, %5, %0\n"	/* Remove the old value. */	\
476		"\torr   %2, %2, %4\n"	/* Put in the new value. */	\
477		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
478		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
479		"\tbne   1b\n"		/* Spin if failed. */		\
480		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
481		  "=&r" (temp2)						\
482		: "r" (val32.v32), "r" (negmask.v32), "m" (*mem32));	\
483	return (get_##N(&old, mem));					\
484}
485
486EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
487EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
488
489#define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
490uintN_t									\
491__sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,		\
492    uintN_t desired)							\
493{									\
494	uint32_t *mem32;						\
495	reg_t expected32, desired32, posmask, old;			\
496	uint32_t negmask, temp1, temp2;					\
497									\
498	mem32 = round_to_word(mem);					\
499	expected32.v32 = 0x00000000;					\
500	put_##N(&expected32, mem, expected);				\
501	desired32.v32 = 0x00000000;					\
502	put_##N(&desired32, mem, desired);				\
503	posmask.v32 = 0x00000000;					\
504	put_##N(&posmask, mem, ~0);					\
505	negmask = ~posmask.v32;						\
506									\
507	do_sync();							\
508	__asm volatile (						\
509		"1:"							\
510		"\tldrex %0, %8\n"	/* Load old value. */		\
511		"\tand   %2, %6, %0\n"	/* Isolate the old value. */	\
512		"\tcmp   %2, %4\n"	/* Compare to expected value. */\
513		"\tbne   2f\n"		/* Values are unequal. */	\
514		"\tand   %2, %7, %0\n"	/* Remove the old value. */	\
515		"\torr   %2, %5\n"	/* Put in the new value. */	\
516		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
517		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
518		"\tbne   1b\n"		/* Spin if failed. */		\
519		"2:"							\
520		: "=&r" (old), "=m" (*mem32), "=&r" (temp1),		\
521		  "=&r" (temp2)						\
522		: "r" (expected32.v32), "r" (desired32.v32),		\
523		  "r" (posmask.v32), "r" (negmask), "m" (*mem32));	\
524	return (get_##N(&old, mem));					\
525}
526
527EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
528EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
529
530#define	EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op)		\
531uintN_t									\
532__sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
533{									\
534	uint32_t *mem32;						\
535	reg_t val32, posmask, old;					\
536	uint32_t negmask, temp1, temp2;					\
537									\
538	mem32 = round_to_word(mem);					\
539	val32.v32 = 0x00000000;						\
540	put_##N(&val32, mem, val);					\
541	posmask.v32 = 0x00000000;					\
542	put_##N(&posmask, mem, ~0);					\
543	negmask = ~posmask.v32;						\
544									\
545	do_sync();							\
546	__asm volatile (						\
547		"1:"							\
548		"\tldrex %0, %7\n"	/* Load old value. */		\
549		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
550		"\tand   %2, %5\n"	/* Isolate the new value. */	\
551		"\tand   %3, %6, %0\n"	/* Remove the old value. */	\
552		"\torr   %2, %2, %3\n"	/* Put in the new value. */	\
553		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
554		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
555		"\tbne   1b\n"		/* Spin if failed. */		\
556		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
557		  "=&r" (temp2)						\
558		: "r" (val32.v32), "r" (posmask.v32), "r" (negmask),	\
559		  "m" (*mem32));					\
560	return (get_##N(&old, mem));					\
561}
562
563EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
564EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
565EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
566EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
567
568#define	EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence)	\
569uintN_t									\
570__sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
571{									\
572	uint32_t *mem32;						\
573	reg_t val32, old;						\
574	uint32_t temp1, temp2;						\
575									\
576	mem32 = round_to_word(mem);					\
577	val32.v32 = idempotence ? 0xffffffff : 0x00000000;		\
578	put_##N(&val32, mem, val);					\
579									\
580	do_sync();							\
581	__asm volatile (						\
582		"1:"							\
583		"\tldrex %0, %5\n"	/* Load old value. */		\
584		"\t"op"  %2, %4, %0\n"	/* Calculate new value. */	\
585		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
586		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
587		"\tbne   1b\n"		/* Spin if failed. */		\
588		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
589		  "=&r" (temp2)						\
590		: "r" (val32.v32), "m" (*mem32));			\
591	return (get_##N(&old, mem));					\
592}
593
594EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
595EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
596EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
597EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
598EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
599EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
600
601/*
602 * 32-bit routines.
603 */
604
605uint32_t
606__sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val)
607{
608	uint32_t old, temp;
609
610	do_sync();
611	__asm volatile (
612		"1:"
613		"\tldrex %0, %4\n"	/* Load old value. */
614		"\tstrex %2, %3, %1\n"	/* Attempt to store. */
615		"\tcmp   %2, #0\n"	/* Did it succeed? */
616		"\tbne   1b\n"		/* Spin if failed. */
617		: "=&r" (old), "=m" (*mem), "=&r" (temp)
618		: "r" (val), "m" (*mem));
619	return (old);
620}
621
622uint32_t
623__sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected,
624    uint32_t desired)
625{
626	uint32_t old, temp;
627
628	do_sync();
629	__asm volatile (
630		"1:"
631		"\tldrex %0, %5\n"	/* Load old value. */
632		"\tcmp   %0, %3\n"	/* Compare to expected value. */
633		"\tbne   2f\n"		/* Values are unequal. */
634		"\tstrex %2, %4, %1\n"	/* Attempt to store. */
635		"\tcmp   %2, #0\n"	/* Did it succeed? */
636		"\tbne   1b\n"		/* Spin if failed. */
637		"2:"
638		: "=&r" (old), "=m" (*mem), "=&r" (temp)
639		: "r" (expected), "r" (desired), "m" (*mem));
640	return (old);
641}
642
643#define	EMIT_FETCH_AND_OP_4(name, op)					\
644uint32_t								\
645__sync_##name##_4##_c(uint32_t *mem, uint32_t val)				\
646{									\
647	uint32_t old, temp1, temp2;					\
648									\
649	do_sync();							\
650	__asm volatile (						\
651		"1:"							\
652		"\tldrex %0, %5\n"	/* Load old value. */		\
653		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
654		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
655		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
656		"\tbne   1b\n"		/* Spin if failed. */		\
657		: "=&r" (old), "=m" (*mem), "=&r" (temp1),		\
658		  "=&r" (temp2)						\
659		: "r" (val), "m" (*mem));				\
660	return (old);							\
661}
662
663EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
664EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
665EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
666EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
667
668#ifndef __clang__
669__strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
670__strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
671__strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
672__strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
673__strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
674__strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
675__strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
676__strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
677__strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
678__strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
679__strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
680__strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
681__strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
682__strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
683__strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
684__strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
685__strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
686__strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
687__strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
688__strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
689#endif
690
691#else /* __ARM_ARCH_5__ */
692
693#ifdef _KERNEL
694
695#ifdef SMP
696#error "On SMP systems we should have proper atomic operations."
697#endif
698
699/*
700 * On uniprocessor systems, we can perform the atomic operations by
701 * disabling interrupts.
702 */
703
704#define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
705uintN_t									\
706__sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected,		\
707    uintN_t desired)							\
708{									\
709	uintN_t ret;							\
710									\
711	WITHOUT_INTERRUPTS({						\
712		ret = *mem;						\
713		if (*mem == expected)					\
714			*mem = desired;					\
715	});								\
716	return (ret);							\
717}
718
719#define	EMIT_FETCH_AND_OP_N(N, uintN_t, name, op)			\
720uintN_t									\
721__sync_##name##_##N(uintN_t *mem, uintN_t val)				\
722{									\
723	uintN_t ret;							\
724									\
725	WITHOUT_INTERRUPTS({						\
726		ret = *mem;						\
727		*mem op val;						\
728	});								\
729	return (ret);							\
730}
731
732#define	EMIT_ALL_OPS_N(N, uintN_t)					\
733EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)					\
734EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =)			\
735EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=)			\
736EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=)			\
737EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=)			\
738EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=)			\
739EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=)
740
741EMIT_ALL_OPS_N(1, uint8_t)
742EMIT_ALL_OPS_N(2, uint16_t)
743EMIT_ALL_OPS_N(4, uint32_t)
744EMIT_ALL_OPS_N(8, uint64_t)
745#undef	EMIT_ALL_OPS_N
746
747#else /* !_KERNEL */
748
749/*
750 * For userspace on uniprocessor systems, we can implement the atomic
751 * operations by using a Restartable Atomic Sequence. This makes the
752 * kernel restart the code from the beginning when interrupted.
753 */
754
755#define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)			\
756uintN_t									\
757__sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)			\
758{									\
759	uint32_t old, temp, ras_start;					\
760									\
761	ras_start = ARM_RAS_START;					\
762	__asm volatile (						\
763		/* Set up Restartable Atomic Sequence. */		\
764		"1:"							\
765		"\tadr   %2, 1b\n"					\
766		"\tstr   %2, [%5]\n"					\
767		"\tadr   %2, 2f\n"					\
768		"\tstr   %2, [%5, #4]\n"				\
769									\
770		"\t"ldr" %0, %4\n"	/* Load old value. */		\
771		"\t"str" %3, %1\n"	/* Store new value. */		\
772									\
773		/* Tear down Restartable Atomic Sequence. */		\
774		"2:"							\
775		"\tmov   %2, #0x00000000\n"				\
776		"\tstr   %2, [%5]\n"					\
777		"\tmov   %2, #0xffffffff\n"				\
778		"\tstr   %2, [%5, #4]\n"				\
779		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
780		: "r" (val), "m" (*mem), "r" (ras_start));		\
781	return (old);							\
782}
783
784#define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)		\
785uintN_t									\
786__sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,		\
787    uintN_t desired)							\
788{									\
789	uint32_t old, temp, ras_start;					\
790									\
791	ras_start = ARM_RAS_START;					\
792	__asm volatile (						\
793		/* Set up Restartable Atomic Sequence. */		\
794		"1:"							\
795		"\tadr   %2, 1b\n"					\
796		"\tstr   %2, [%6]\n"					\
797		"\tadr   %2, 2f\n"					\
798		"\tstr   %2, [%6, #4]\n"				\
799									\
800		"\t"ldr" %0, %5\n"	/* Load old value. */		\
801		"\tcmp   %0, %3\n"	/* Compare to expected value. */\
802		"\t"streq" %4, %1\n"	/* Store new value. */		\
803									\
804		/* Tear down Restartable Atomic Sequence. */		\
805		"2:"							\
806		"\tmov   %2, #0x00000000\n"				\
807		"\tstr   %2, [%6]\n"					\
808		"\tmov   %2, #0xffffffff\n"				\
809		"\tstr   %2, [%6, #4]\n"				\
810		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
811		: "r" (expected), "r" (desired), "m" (*mem),		\
812		  "r" (ras_start));					\
813	return (old);							\
814}
815
816#define	EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op)		\
817uintN_t									\
818__sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
819{									\
820	uint32_t old, temp, ras_start;					\
821									\
822	ras_start = ARM_RAS_START;					\
823	__asm volatile (						\
824		/* Set up Restartable Atomic Sequence. */		\
825		"1:"							\
826		"\tadr   %2, 1b\n"					\
827		"\tstr   %2, [%5]\n"					\
828		"\tadr   %2, 2f\n"					\
829		"\tstr   %2, [%5, #4]\n"				\
830									\
831		"\t"ldr" %0, %4\n"	/* Load old value. */		\
832		"\t"op"  %2, %0, %3\n"	/* Calculate new value. */	\
833		"\t"str" %2, %1\n"	/* Store new value. */		\
834									\
835		/* Tear down Restartable Atomic Sequence. */		\
836		"2:"							\
837		"\tmov   %2, #0x00000000\n"				\
838		"\tstr   %2, [%5]\n"					\
839		"\tmov   %2, #0xffffffff\n"				\
840		"\tstr   %2, [%5, #4]\n"				\
841		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
842		: "r" (val), "m" (*mem), "r" (ras_start));		\
843	return (old);							\
844}
845
846#define	EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)			\
847EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)				\
848EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)			\
849EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add")		\
850EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and")		\
851EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr")		\
852EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub")		\
853EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor")
854
855EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb")
856EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh")
857EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
858
859#ifndef __clang__
860__strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
861__strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
862__strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
863__strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
864__strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
865__strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
866__strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
867__strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
868__strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
869__strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
870__strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
871__strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
872__strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
873__strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
874__strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
875__strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
876__strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
877__strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
878__strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
879__strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
880#endif
881
882#endif /* _KERNEL */
883
884#endif
885
886#endif /* __SYNC_ATOMICS */
887