stdatomic.c revision 255092
1/*-
2 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/arm/arm/stdatomic.c 255092 2013-08-31 08:50:45Z theraven $");
29
30#include <sys/param.h>
31#include <sys/stdatomic.h>
32#include <sys/types.h>
33
34#include <machine/cpufunc.h>
35#include <machine/sysarch.h>
36
37#ifdef _KERNEL
38#include "opt_global.h"
39#endif
40
41/*
42 * Executing statements with interrupts disabled.
43 */
44
45#if defined(_KERNEL) && !defined(SMP)
46#define	WITHOUT_INTERRUPTS(s) do {					\
47	register_t regs;						\
48									\
49	regs = intr_disable();						\
50	do s while (0);							\
51	intr_restore(regs);						\
52} while (0)
53#endif /* _KERNEL && !SMP */
54
55/*
56 * Memory barriers.
57 *
58 * It turns out __sync_synchronize() does not emit any code when used
59 * with GCC 4.2. Implement our own version that does work reliably.
60 *
61 * Although __sync_lock_test_and_set() should only perform an acquire
62 * barrier, make it do a full barrier like the other functions. This
63 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
64 */
65
66#if defined(_KERNEL) && !defined(SMP)
67static inline void
68do_sync(void)
69{
70
71	__asm volatile ("" : : : "memory");
72}
73#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
74static inline void
75do_sync(void)
76{
77
78	__asm volatile ("dmb" : : : "memory");
79}
80#elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
81    defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
82    defined(__ARM_ARCH_6ZK__)
83static inline void
84do_sync(void)
85{
86
87	__asm volatile ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory");
88}
89#endif
90
91#if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS)
92
93/*
94 * New C11 __atomic_* API.
95 */
96
97#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
98    defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
99    defined(__ARM_ARCH_6ZK__) || \
100    defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
101
102/* These systems should be supported by the compiler. */
103
104#else /* __ARM_ARCH_5__ */
105
106/* Clang doesn't allow us to reimplement builtins without this. */
107#ifdef __clang__
108#pragma redefine_extname __sync_synchronize_ext __sync_synchronize
109#define __sync_synchronize __sync_synchronize_ext
110#endif
111
112void
113__sync_synchronize(void)
114{
115}
116
117#ifdef _KERNEL
118
119#ifdef SMP
120#error "On SMP systems we should have proper atomic operations."
121#endif
122
123/*
124 * On uniprocessor systems, we can perform the atomic operations by
125 * disabling interrupts.
126 */
127
128#define	EMIT_LOAD_N(N, uintN_t)						\
129uintN_t									\
130__atomic_load_##N(uintN_t *mem, int model __unused)			\
131{									\
132	uintN_t ret;							\
133									\
134	WITHOUT_INTERRUPTS({						\
135		ret = *mem;						\
136	});								\
137	return (ret);							\
138}
139
140#define	EMIT_STORE_N(N, uintN_t)					\
141void									\
142__atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)	\
143{									\
144									\
145	WITHOUT_INTERRUPTS({						\
146		*mem = val;						\
147	});								\
148}
149
150#define	EMIT_COMPARE_EXCHANGE_N(N, uintN_t)				\
151_Bool									\
152__atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected,		\
153    uintN_t desired, int success __unused, int failure __unused)	\
154{									\
155	_Bool ret;							\
156									\
157	WITHOUT_INTERRUPTS({						\
158		if (*mem == *expected) {				\
159			*mem = desired;					\
160			ret = 1;					\
161		} else {						\
162			*expected = *mem;				\
163			ret = 0;					\
164		}							\
165	});								\
166	return (ret);							\
167}
168
169#define	EMIT_FETCH_OP_N(N, uintN_t, name, op)				\
170uintN_t									\
171__atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)	\
172{									\
173	uintN_t ret;							\
174									\
175	WITHOUT_INTERRUPTS({						\
176		ret = *mem;						\
177		*mem op val;						\
178	});								\
179	return (ret);							\
180}
181
182#define	EMIT_ALL_OPS_N(N, uintN_t)					\
183EMIT_LOAD_N(N, uintN_t)							\
184EMIT_STORE_N(N, uintN_t)						\
185EMIT_COMPARE_EXCHANGE_N(N, uintN_t)					\
186EMIT_FETCH_OP_N(N, uintN_t, exchange, =)				\
187EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=)				\
188EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=)				\
189EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=)				\
190EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=)				\
191EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=)
192
193EMIT_ALL_OPS_N(1, uint8_t)
194EMIT_ALL_OPS_N(2, uint16_t)
195EMIT_ALL_OPS_N(4, uint32_t)
196EMIT_ALL_OPS_N(8, uint64_t)
197#undef	EMIT_ALL_OPS_N
198
199#else /* !_KERNEL */
200
201/*
202 * For userspace on uniprocessor systems, we can implement the atomic
203 * operations by using a Restartable Atomic Sequence. This makes the
204 * kernel restart the code from the beginning when interrupted.
205 */
206
207#define	EMIT_LOAD_N(N, uintN_t)						\
208uintN_t									\
209__atomic_load_##N(uintN_t *mem, int model __unused)			\
210{									\
211									\
212	return (*mem);							\
213}
214
215#define	EMIT_STORE_N(N, uintN_t)					\
216void									\
217__atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)	\
218{									\
219									\
220	*mem = val;							\
221}
222
223#define	EMIT_EXCHANGE_N(N, uintN_t, ldr, str)				\
224uintN_t									\
225__atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused)	\
226{									\
227	uint32_t old, temp, ras_start;					\
228									\
229	ras_start = ARM_RAS_START;					\
230	__asm volatile (						\
231		/* Set up Restartable Atomic Sequence. */		\
232		"1:"							\
233		"\tadr   %2, 1b\n"					\
234		"\tstr   %2, [%5]\n"					\
235		"\tadr   %2, 2f\n"					\
236		"\tstr   %2, [%5, #4]\n"				\
237									\
238		"\t"ldr" %0, %4\n"	/* Load old value. */		\
239		"\t"str" %3, %1\n"	/* Store new value. */		\
240									\
241		/* Tear down Restartable Atomic Sequence. */		\
242		"2:"							\
243		"\tmov   %2, #0x00000000\n"				\
244		"\tstr   %2, [%5]\n"					\
245		"\tmov   %2, #0xffffffff\n"				\
246		"\tstr   %2, [%5, #4]\n"				\
247		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
248		: "r" (val), "m" (*mem), "r" (ras_start));		\
249	return (old);							\
250}
251
252#define	EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)			\
253_Bool									\
254__atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected,		\
255    uintN_t desired, int success __unused, int failure __unused)	\
256{									\
257	uint32_t expected, old, temp, ras_start;			\
258									\
259	expected = *pexpected;						\
260	ras_start = ARM_RAS_START;					\
261	__asm volatile (						\
262		/* Set up Restartable Atomic Sequence. */		\
263		"1:"							\
264		"\tadr   %2, 1b\n"					\
265		"\tstr   %2, [%6]\n"					\
266		"\tadr   %2, 2f\n"					\
267		"\tstr   %2, [%6, #4]\n"				\
268									\
269		"\t"ldr" %0, %5\n"	/* Load old value. */		\
270		"\tcmp   %0, %3\n"	/* Compare to expected value. */\
271		"\t"streq" %4, %1\n"	/* Store new value. */		\
272									\
273		/* Tear down Restartable Atomic Sequence. */		\
274		"2:"							\
275		"\tmov   %2, #0x00000000\n"				\
276		"\tstr   %2, [%6]\n"					\
277		"\tmov   %2, #0xffffffff\n"				\
278		"\tstr   %2, [%6, #4]\n"				\
279		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
280		: "r" (expected), "r" (desired), "m" (*mem),		\
281		  "r" (ras_start));					\
282	if (old == expected) {						\
283		return (1);						\
284	} else {							\
285		*pexpected = old;					\
286		return (0);						\
287	}								\
288}
289
290#define	EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op)			\
291uintN_t									\
292__atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)	\
293{									\
294	uint32_t old, temp, ras_start;					\
295									\
296	ras_start = ARM_RAS_START;					\
297	__asm volatile (						\
298		/* Set up Restartable Atomic Sequence. */		\
299		"1:"							\
300		"\tadr   %2, 1b\n"					\
301		"\tstr   %2, [%5]\n"					\
302		"\tadr   %2, 2f\n"					\
303		"\tstr   %2, [%5, #4]\n"				\
304									\
305		"\t"ldr" %0, %4\n"	/* Load old value. */		\
306		"\t"op"  %2, %0, %3\n"	/* Calculate new value. */	\
307		"\t"str" %2, %1\n"	/* Store new value. */		\
308									\
309		/* Tear down Restartable Atomic Sequence. */		\
310		"2:"							\
311		"\tmov   %2, #0x00000000\n"				\
312		"\tstr   %2, [%5]\n"					\
313		"\tmov   %2, #0xffffffff\n"				\
314		"\tstr   %2, [%5, #4]\n"				\
315		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
316		: "r" (val), "m" (*mem), "r" (ras_start));		\
317	return (old);							\
318}
319
320#define	EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)			\
321EMIT_LOAD_N(N, uintN_t)							\
322EMIT_STORE_N(N, uintN_t)						\
323EMIT_EXCHANGE_N(N, uintN_t, ldr, str)					\
324EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)				\
325EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add")			\
326EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and")			\
327EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or, "orr")			\
328EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub")			\
329EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor")
330
331EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
332EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
333EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
334#undef	EMIT_ALL_OPS_N
335
336#endif /* _KERNEL */
337
338#endif
339
340#endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */
341
342#if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS)
343
344#ifdef __clang__
345#pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1
346#pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2
347#pragma	redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4
348#pragma	redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1
349#pragma	redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2
350#pragma	redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4
351#pragma	redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1
352#pragma	redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2
353#pragma	redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4
354#pragma	redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1
355#pragma	redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2
356#pragma	redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4
357#pragma	redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1
358#pragma	redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2
359#pragma	redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4
360#pragma	redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1
361#pragma	redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2
362#pragma	redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4
363#pragma	redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1
364#pragma	redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2
365#pragma	redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4
366#endif
367
368/*
369 * Old __sync_* API.
370 */
371
372#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
373    defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
374    defined(__ARM_ARCH_6ZK__) || \
375    defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
376
377/* Implementations for old GCC versions, lacking support for atomics. */
378
379typedef union {
380	uint8_t		v8[4];
381	uint32_t	v32;
382} reg_t;
383
384/*
385 * Given a memory address pointing to an 8-bit or 16-bit integer, return
386 * the address of the 32-bit word containing it.
387 */
388
389static inline uint32_t *
390round_to_word(void *ptr)
391{
392
393	return ((uint32_t *)((intptr_t)ptr & ~3));
394}
395
396/*
397 * Utility functions for loading and storing 8-bit and 16-bit integers
398 * in 32-bit words at an offset corresponding with the location of the
399 * atomic variable.
400 */
401
402static inline void
403put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
404{
405	size_t offset;
406
407	offset = (intptr_t)offset_ptr & 3;
408	r->v8[offset] = val;
409}
410
411static inline uint8_t
412get_1(const reg_t *r, const uint8_t *offset_ptr)
413{
414	size_t offset;
415
416	offset = (intptr_t)offset_ptr & 3;
417	return (r->v8[offset]);
418}
419
420static inline void
421put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
422{
423	size_t offset;
424	union {
425		uint16_t in;
426		uint8_t out[2];
427	} bytes;
428
429	offset = (intptr_t)offset_ptr & 3;
430	bytes.in = val;
431	r->v8[offset] = bytes.out[0];
432	r->v8[offset + 1] = bytes.out[1];
433}
434
435static inline uint16_t
436get_2(const reg_t *r, const uint16_t *offset_ptr)
437{
438	size_t offset;
439	union {
440		uint8_t in[2];
441		uint16_t out;
442	} bytes;
443
444	offset = (intptr_t)offset_ptr & 3;
445	bytes.in[0] = r->v8[offset];
446	bytes.in[1] = r->v8[offset + 1];
447	return (bytes.out);
448}
449
450/*
451 * 8-bit and 16-bit routines.
452 *
453 * These operations are not natively supported by the CPU, so we use
454 * some shifting and bitmasking on top of the 32-bit instructions.
455 */
456
457#define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t)				\
458uintN_t									\
459__sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)			\
460{									\
461	uint32_t *mem32;						\
462	reg_t val32, negmask, old;					\
463	uint32_t temp1, temp2;						\
464									\
465	mem32 = round_to_word(mem);					\
466	val32.v32 = 0x00000000;						\
467	put_##N(&val32, mem, val);					\
468	negmask.v32 = 0xffffffff;					\
469	put_##N(&negmask, mem, 0);					\
470									\
471	do_sync();							\
472	__asm volatile (						\
473		"1:"							\
474		"\tldrex %0, %6\n"	/* Load old value. */		\
475		"\tand   %2, %5, %0\n"	/* Remove the old value. */	\
476		"\torr   %2, %2, %4\n"	/* Put in the new value. */	\
477		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
478		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
479		"\tbne   1b\n"		/* Spin if failed. */		\
480		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
481		  "=&r" (temp2)						\
482		: "r" (val32.v32), "r" (negmask.v32), "m" (*mem32));	\
483	return (get_##N(&old, mem));					\
484}
485
486EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
487EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
488
489#define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
490uintN_t									\
491__sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,		\
492    uintN_t desired)							\
493{									\
494	uint32_t *mem32;						\
495	reg_t expected32, desired32, posmask, old;			\
496	uint32_t negmask, temp1, temp2;					\
497									\
498	mem32 = round_to_word(mem);					\
499	expected32.v32 = 0x00000000;					\
500	put_##N(&expected32, mem, expected);				\
501	desired32.v32 = 0x00000000;					\
502	put_##N(&desired32, mem, desired);				\
503	posmask.v32 = 0x00000000;					\
504	put_##N(&posmask, mem, ~0);					\
505	negmask = ~posmask.v32;						\
506									\
507	do_sync();							\
508	__asm volatile (						\
509		"1:"							\
510		"\tldrex %0, %8\n"	/* Load old value. */		\
511		"\tand   %2, %6, %0\n"	/* Isolate the old value. */	\
512		"\tcmp   %2, %4\n"	/* Compare to expected value. */\
513		"\tbne   2f\n"		/* Values are unequal. */	\
514		"\tand   %2, %7, %0\n"	/* Remove the old value. */	\
515		"\torr   %2, %5\n"	/* Put in the new value. */	\
516		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
517		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
518		"\tbne   1b\n"		/* Spin if failed. */		\
519		"2:"							\
520		: "=&r" (old), "=m" (*mem32), "=&r" (temp1),		\
521		  "=&r" (temp2)						\
522		: "r" (expected32.v32), "r" (desired32.v32),		\
523		  "r" (posmask.v32), "r" (negmask), "m" (*mem32));	\
524	return (get_##N(&old, mem));					\
525}
526
527EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
528EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
529
530#define	EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op)		\
531uintN_t									\
532__sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
533{									\
534	uint32_t *mem32;						\
535	reg_t val32, posmask, old;					\
536	uint32_t negmask, temp1, temp2;					\
537									\
538	mem32 = round_to_word(mem);					\
539	val32.v32 = 0x00000000;						\
540	put_##N(&val32, mem, val);					\
541	posmask.v32 = 0x00000000;					\
542	put_##N(&posmask, mem, ~0);					\
543	negmask = ~posmask.v32;						\
544									\
545	do_sync();							\
546	__asm volatile (						\
547		"1:"							\
548		"\tldrex %0, %7\n"	/* Load old value. */		\
549		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
550		"\tand   %2, %5\n"	/* Isolate the new value. */	\
551		"\tand   %3, %6, %0\n"	/* Remove the old value. */	\
552		"\torr   %2, %2, %3\n"	/* Put in the new value. */	\
553		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
554		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
555		"\tbne   1b\n"		/* Spin if failed. */		\
556		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
557		  "=&r" (temp2)						\
558		: "r" (val32.v32), "r" (posmask.v32), "r" (negmask),	\
559		  "m" (*mem32));					\
560	return (get_##N(&old, mem));					\
561}
562
563EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
564EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
565EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
566EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
567
568#define	EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence)	\
569uintN_t									\
570__sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
571{									\
572	uint32_t *mem32;						\
573	reg_t val32, old;						\
574	uint32_t temp1, temp2;						\
575									\
576	mem32 = round_to_word(mem);					\
577	val32.v32 = idempotence ? 0xffffffff : 0x00000000;		\
578	put_##N(&val32, mem, val);					\
579									\
580	do_sync();							\
581	__asm volatile (						\
582		"1:"							\
583		"\tldrex %0, %5\n"	/* Load old value. */		\
584		"\t"op"  %2, %4, %0\n"	/* Calculate new value. */	\
585		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
586		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
587		"\tbne   1b\n"		/* Spin if failed. */		\
588		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
589		  "=&r" (temp2)						\
590		: "r" (val32.v32), "m" (*mem32));			\
591	return (get_##N(&old, mem));					\
592}
593
594EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
595EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
596EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
597EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
598EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
599EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
600
601/*
602 * 32-bit routines.
603 */
604
605uint32_t
606__sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val)
607{
608	uint32_t old, temp;
609
610	do_sync();
611	__asm volatile (
612		"1:"
613		"\tldrex %0, %4\n"	/* Load old value. */
614		"\tstrex %2, %3, %1\n"	/* Attempt to store. */
615		"\tcmp   %2, #0\n"	/* Did it succeed? */
616		"\tbne   1b\n"		/* Spin if failed. */
617		: "=&r" (old), "=m" (*mem), "=&r" (temp)
618		: "r" (val), "m" (*mem));
619	return (old);
620}
621
622uint32_t
623__sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected,
624    uint32_t desired)
625{
626	uint32_t old, temp;
627
628	do_sync();
629	__asm volatile (
630		"1:"
631		"\tldrex %0, %5\n"	/* Load old value. */
632		"\tcmp   %0, %3\n"	/* Compare to expected value. */
633		"\tbne   2f\n"		/* Values are unequal. */
634		"\tstrex %2, %4, %1\n"	/* Attempt to store. */
635		"\tcmp   %2, #0\n"	/* Did it succeed? */
636		"\tbne   1b\n"		/* Spin if failed. */
637		"2:"
638		: "=&r" (old), "=m" (*mem), "=&r" (temp)
639		: "r" (expected), "r" (desired), "m" (*mem));
640	return (old);
641}
642
643#define	EMIT_FETCH_AND_OP_4(name, op)					\
644uint32_t								\
645__sync_##name##_4##_c(uint32_t *mem, uint32_t val)				\
646{									\
647	uint32_t old, temp1, temp2;					\
648									\
649	do_sync();							\
650	__asm volatile (						\
651		"1:"							\
652		"\tldrex %0, %5\n"	/* Load old value. */		\
653		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
654		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
655		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
656		"\tbne   1b\n"		/* Spin if failed. */		\
657		: "=&r" (old), "=m" (*mem), "=&r" (temp1),		\
658		  "=&r" (temp2)						\
659		: "r" (val), "m" (*mem));				\
660	return (old);							\
661}
662
663EMIT_FETCH_AND_OP_4(fetch_and_add, "add")
664EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
665EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
666EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
667EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
668
669#else /* __ARM_ARCH_5__ */
670
671#ifdef _KERNEL
672
673#ifdef SMP
674#error "On SMP systems we should have proper atomic operations."
675#endif
676
677/*
678 * On uniprocessor systems, we can perform the atomic operations by
679 * disabling interrupts.
680 */
681
682#define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
683uintN_t									\
684__sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected,		\
685    uintN_t desired)							\
686{									\
687	uintN_t ret;							\
688									\
689	WITHOUT_INTERRUPTS({						\
690		ret = *mem;						\
691		if (*mem == expected)					\
692			*mem = desired;					\
693	});								\
694	return (ret);							\
695}
696
697#define	EMIT_FETCH_AND_OP_N(N, uintN_t, name, op)			\
698uintN_t									\
699__sync_##name##_##N(uintN_t *mem, uintN_t val)				\
700{									\
701	uintN_t ret;							\
702									\
703	WITHOUT_INTERRUPTS({						\
704		ret = *mem;						\
705		*mem op val;						\
706	});								\
707	return (ret);							\
708}
709
710#define	EMIT_ALL_OPS_N(N, uintN_t)					\
711EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)					\
712EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =)			\
713EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=)			\
714EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=)			\
715EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=)			\
716EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=)			\
717EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=)
718
719EMIT_ALL_OPS_N(1, uint8_t)
720EMIT_ALL_OPS_N(2, uint16_t)
721EMIT_ALL_OPS_N(4, uint32_t)
722EMIT_ALL_OPS_N(8, uint64_t)
723#undef	EMIT_ALL_OPS_N
724
725#else /* !_KERNEL */
726
727/*
728 * For userspace on uniprocessor systems, we can implement the atomic
729 * operations by using a Restartable Atomic Sequence. This makes the
730 * kernel restart the code from the beginning when interrupted.
731 */
732
733#define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)			\
734uintN_t									\
735__sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)			\
736{									\
737	uint32_t old, temp, ras_start;					\
738									\
739	ras_start = ARM_RAS_START;					\
740	__asm volatile (						\
741		/* Set up Restartable Atomic Sequence. */		\
742		"1:"							\
743		"\tadr   %2, 1b\n"					\
744		"\tstr   %2, [%5]\n"					\
745		"\tadr   %2, 2f\n"					\
746		"\tstr   %2, [%5, #4]\n"				\
747									\
748		"\t"ldr" %0, %4\n"	/* Load old value. */		\
749		"\t"str" %3, %1\n"	/* Store new value. */		\
750									\
751		/* Tear down Restartable Atomic Sequence. */		\
752		"2:"							\
753		"\tmov   %2, #0x00000000\n"				\
754		"\tstr   %2, [%5]\n"					\
755		"\tmov   %2, #0xffffffff\n"				\
756		"\tstr   %2, [%5, #4]\n"				\
757		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
758		: "r" (val), "m" (*mem), "r" (ras_start));		\
759	return (old);							\
760}
761
762#define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)		\
763uintN_t									\
764__sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,		\
765    uintN_t desired)							\
766{									\
767	uint32_t old, temp, ras_start;					\
768									\
769	ras_start = ARM_RAS_START;					\
770	__asm volatile (						\
771		/* Set up Restartable Atomic Sequence. */		\
772		"1:"							\
773		"\tadr   %2, 1b\n"					\
774		"\tstr   %2, [%6]\n"					\
775		"\tadr   %2, 2f\n"					\
776		"\tstr   %2, [%6, #4]\n"				\
777									\
778		"\t"ldr" %0, %5\n"	/* Load old value. */		\
779		"\tcmp   %0, %3\n"	/* Compare to expected value. */\
780		"\t"streq" %4, %1\n"	/* Store new value. */		\
781									\
782		/* Tear down Restartable Atomic Sequence. */		\
783		"2:"							\
784		"\tmov   %2, #0x00000000\n"				\
785		"\tstr   %2, [%6]\n"					\
786		"\tmov   %2, #0xffffffff\n"				\
787		"\tstr   %2, [%6, #4]\n"				\
788		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
789		: "r" (expected), "r" (desired), "m" (*mem),		\
790		  "r" (ras_start));					\
791	return (old);							\
792}
793
794#define	EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op)		\
795uintN_t									\
796__sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
797{									\
798	uint32_t old, temp, ras_start;					\
799									\
800	ras_start = ARM_RAS_START;					\
801	__asm volatile (						\
802		/* Set up Restartable Atomic Sequence. */		\
803		"1:"							\
804		"\tadr   %2, 1b\n"					\
805		"\tstr   %2, [%5]\n"					\
806		"\tadr   %2, 2f\n"					\
807		"\tstr   %2, [%5, #4]\n"				\
808									\
809		"\t"ldr" %0, %4\n"	/* Load old value. */		\
810		"\t"op"  %2, %0, %3\n"	/* Calculate new value. */	\
811		"\t"str" %2, %1\n"	/* Store new value. */		\
812									\
813		/* Tear down Restartable Atomic Sequence. */		\
814		"2:"							\
815		"\tmov   %2, #0x00000000\n"				\
816		"\tstr   %2, [%5]\n"					\
817		"\tmov   %2, #0xffffffff\n"				\
818		"\tstr   %2, [%5, #4]\n"				\
819		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
820		: "r" (val), "m" (*mem), "r" (ras_start));		\
821	return (old);							\
822}
823
824#define	EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)			\
825EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)				\
826EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)			\
827EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add")		\
828EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and")		\
829EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr")		\
830EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub")		\
831EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor")
832
833EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb")
834EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh")
835EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
836
837#ifndef __clang__
838__strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
839__strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
840__strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
841__strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
842__strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
843__strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
844__strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
845__strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
846__strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
847__strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
848__strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
849__strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
850__strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
851__strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
852__strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
853__strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
854__strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
855__strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
856__strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
857__strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
858__strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
859#endif
860
861#endif /* _KERNEL */
862
863#endif
864
865#endif /* __SYNC_ATOMICS */
866