stdatomic.c revision 275564
1/*-
2 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/arm/arm/stdatomic.c 275564 2014-12-06 11:59:35Z andrew $");
29
30#include <sys/param.h>
31#include <sys/stdatomic.h>
32#include <sys/types.h>
33
34#include <machine/cpufunc.h>
35#include <machine/sysarch.h>
36
37/*
38 * Executing statements with interrupts disabled.
39 */
40
41#if defined(_KERNEL) && !defined(SMP)
42#define	WITHOUT_INTERRUPTS(s) do {					\
43	register_t regs;						\
44									\
45	regs = intr_disable();						\
46	do s while (0);							\
47	intr_restore(regs);						\
48} while (0)
49#endif /* _KERNEL && !SMP */
50
51/*
52 * Memory barriers.
53 *
54 * It turns out __sync_synchronize() does not emit any code when used
55 * with GCC 4.2. Implement our own version that does work reliably.
56 *
57 * Although __sync_lock_test_and_set() should only perform an acquire
58 * barrier, make it do a full barrier like the other functions. This
59 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
60 */
61
62#if defined(_KERNEL) && !defined(SMP)
63static inline void
64do_sync(void)
65{
66
67	__asm volatile ("" : : : "memory");
68}
69#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
70static inline void
71do_sync(void)
72{
73
74	__asm volatile ("dmb" : : : "memory");
75}
76#elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
77    defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
78    defined(__ARM_ARCH_6ZK__)
79static inline void
80do_sync(void)
81{
82
83	__asm volatile ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory");
84}
85#endif
86
87#if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS)
88
89/*
90 * New C11 __atomic_* API.
91 */
92
93#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
94    defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
95    defined(__ARM_ARCH_6ZK__) || \
96    defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
97
98/* These systems should be supported by the compiler. */
99
100#else /* __ARM_ARCH_5__ */
101
102/* Clang doesn't allow us to reimplement builtins without this. */
103#ifdef __clang__
104#pragma redefine_extname __sync_synchronize_ext __sync_synchronize
105#define __sync_synchronize __sync_synchronize_ext
106#endif
107
108void
109__sync_synchronize(void)
110{
111}
112
113#ifdef _KERNEL
114
115#ifdef SMP
116#error "On SMP systems we should have proper atomic operations."
117#endif
118
119/*
120 * On uniprocessor systems, we can perform the atomic operations by
121 * disabling interrupts.
122 */
123
124#define	EMIT_LOAD_N(N, uintN_t)						\
125uintN_t									\
126__atomic_load_##N(uintN_t *mem, int model __unused)			\
127{									\
128	uintN_t ret;							\
129									\
130	WITHOUT_INTERRUPTS({						\
131		ret = *mem;						\
132	});								\
133	return (ret);							\
134}
135
136#define	EMIT_STORE_N(N, uintN_t)					\
137void									\
138__atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)	\
139{									\
140									\
141	WITHOUT_INTERRUPTS({						\
142		*mem = val;						\
143	});								\
144}
145
146#define	EMIT_COMPARE_EXCHANGE_N(N, uintN_t)				\
147_Bool									\
148__atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected,		\
149    uintN_t desired, int success __unused, int failure __unused)	\
150{									\
151	_Bool ret;							\
152									\
153	WITHOUT_INTERRUPTS({						\
154		if (*mem == *expected) {				\
155			*mem = desired;					\
156			ret = 1;					\
157		} else {						\
158			*expected = *mem;				\
159			ret = 0;					\
160		}							\
161	});								\
162	return (ret);							\
163}
164
165#define	EMIT_FETCH_OP_N(N, uintN_t, name, op)				\
166uintN_t									\
167__atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)	\
168{									\
169	uintN_t ret;							\
170									\
171	WITHOUT_INTERRUPTS({						\
172		ret = *mem;						\
173		*mem op val;						\
174	});								\
175	return (ret);							\
176}
177
178#define	EMIT_ALL_OPS_N(N, uintN_t)					\
179EMIT_LOAD_N(N, uintN_t)							\
180EMIT_STORE_N(N, uintN_t)						\
181EMIT_COMPARE_EXCHANGE_N(N, uintN_t)					\
182EMIT_FETCH_OP_N(N, uintN_t, exchange, =)				\
183EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=)				\
184EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=)				\
185EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=)				\
186EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=)				\
187EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=)
188
189EMIT_ALL_OPS_N(1, uint8_t)
190EMIT_ALL_OPS_N(2, uint16_t)
191EMIT_ALL_OPS_N(4, uint32_t)
192EMIT_ALL_OPS_N(8, uint64_t)
193#undef	EMIT_ALL_OPS_N
194
195#else /* !_KERNEL */
196
197/*
198 * For userspace on uniprocessor systems, we can implement the atomic
199 * operations by using a Restartable Atomic Sequence. This makes the
200 * kernel restart the code from the beginning when interrupted.
201 */
202
203#define	EMIT_LOAD_N(N, uintN_t)						\
204uintN_t									\
205__atomic_load_##N(uintN_t *mem, int model __unused)			\
206{									\
207									\
208	return (*mem);							\
209}
210
211#define	EMIT_STORE_N(N, uintN_t)					\
212void									\
213__atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)	\
214{									\
215									\
216	*mem = val;							\
217}
218
219#define	EMIT_EXCHANGE_N(N, uintN_t, ldr, str)				\
220uintN_t									\
221__atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused)	\
222{									\
223	uint32_t old, temp, ras_start;					\
224									\
225	ras_start = ARM_RAS_START;					\
226	__asm volatile (						\
227		/* Set up Restartable Atomic Sequence. */		\
228		"1:"							\
229		"\tadr   %2, 1b\n"					\
230		"\tstr   %2, [%5]\n"					\
231		"\tadr   %2, 2f\n"					\
232		"\tstr   %2, [%5, #4]\n"				\
233									\
234		"\t"ldr" %0, %4\n"	/* Load old value. */		\
235		"\t"str" %3, %1\n"	/* Store new value. */		\
236									\
237		/* Tear down Restartable Atomic Sequence. */		\
238		"2:"							\
239		"\tmov   %2, #0x00000000\n"				\
240		"\tstr   %2, [%5]\n"					\
241		"\tmov   %2, #0xffffffff\n"				\
242		"\tstr   %2, [%5, #4]\n"				\
243		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
244		: "r" (val), "m" (*mem), "r" (ras_start));		\
245	return (old);							\
246}
247
248#define	EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)			\
249_Bool									\
250__atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected,		\
251    uintN_t desired, int success __unused, int failure __unused)	\
252{									\
253	uint32_t expected, old, temp, ras_start;			\
254									\
255	expected = *pexpected;						\
256	ras_start = ARM_RAS_START;					\
257	__asm volatile (						\
258		/* Set up Restartable Atomic Sequence. */		\
259		"1:"							\
260		"\tadr   %2, 1b\n"					\
261		"\tstr   %2, [%6]\n"					\
262		"\tadr   %2, 2f\n"					\
263		"\tstr   %2, [%6, #4]\n"				\
264									\
265		"\t"ldr" %0, %5\n"	/* Load old value. */		\
266		"\tcmp   %0, %3\n"	/* Compare to expected value. */\
267		"\t"streq" %4, %1\n"	/* Store new value. */		\
268									\
269		/* Tear down Restartable Atomic Sequence. */		\
270		"2:"							\
271		"\tmov   %2, #0x00000000\n"				\
272		"\tstr   %2, [%6]\n"					\
273		"\tmov   %2, #0xffffffff\n"				\
274		"\tstr   %2, [%6, #4]\n"				\
275		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
276		: "r" (expected), "r" (desired), "m" (*mem),		\
277		  "r" (ras_start));					\
278	if (old == expected) {						\
279		return (1);						\
280	} else {							\
281		*pexpected = old;					\
282		return (0);						\
283	}								\
284}
285
286#define	EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op)			\
287uintN_t									\
288__atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)	\
289{									\
290	uint32_t old, temp, ras_start;					\
291									\
292	ras_start = ARM_RAS_START;					\
293	__asm volatile (						\
294		/* Set up Restartable Atomic Sequence. */		\
295		"1:"							\
296		"\tadr   %2, 1b\n"					\
297		"\tstr   %2, [%5]\n"					\
298		"\tadr   %2, 2f\n"					\
299		"\tstr   %2, [%5, #4]\n"				\
300									\
301		"\t"ldr" %0, %4\n"	/* Load old value. */		\
302		"\t"op"  %2, %0, %3\n"	/* Calculate new value. */	\
303		"\t"str" %2, %1\n"	/* Store new value. */		\
304									\
305		/* Tear down Restartable Atomic Sequence. */		\
306		"2:"							\
307		"\tmov   %2, #0x00000000\n"				\
308		"\tstr   %2, [%5]\n"					\
309		"\tmov   %2, #0xffffffff\n"				\
310		"\tstr   %2, [%5, #4]\n"				\
311		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
312		: "r" (val), "m" (*mem), "r" (ras_start));		\
313	return (old);							\
314}
315
316#define	EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)			\
317EMIT_LOAD_N(N, uintN_t)							\
318EMIT_STORE_N(N, uintN_t)						\
319EMIT_EXCHANGE_N(N, uintN_t, ldr, str)					\
320EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)				\
321EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add")			\
322EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and")			\
323EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or, "orr")			\
324EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub")			\
325EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor")
326
327EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
328EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
329EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
330#undef	EMIT_ALL_OPS_N
331
332#endif /* _KERNEL */
333
334#endif
335
336#endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */
337
338#if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS)
339
340#ifdef __clang__
341#pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1
342#pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2
343#pragma	redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4
344#pragma	redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1
345#pragma	redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2
346#pragma	redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4
347#pragma	redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1
348#pragma	redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2
349#pragma	redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4
350#pragma	redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1
351#pragma	redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2
352#pragma	redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4
353#pragma	redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1
354#pragma	redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2
355#pragma	redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4
356#pragma	redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1
357#pragma	redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2
358#pragma	redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4
359#pragma	redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1
360#pragma	redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2
361#pragma	redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4
362#endif
363
364/*
365 * Old __sync_* API.
366 */
367
368#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
369    defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
370    defined(__ARM_ARCH_6ZK__) || \
371    defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
372
373/* Implementations for old GCC versions, lacking support for atomics. */
374
375typedef union {
376	uint8_t		v8[4];
377	uint32_t	v32;
378} reg_t;
379
380/*
381 * Given a memory address pointing to an 8-bit or 16-bit integer, return
382 * the address of the 32-bit word containing it.
383 */
384
385static inline uint32_t *
386round_to_word(void *ptr)
387{
388
389	return ((uint32_t *)((intptr_t)ptr & ~3));
390}
391
392/*
393 * Utility functions for loading and storing 8-bit and 16-bit integers
394 * in 32-bit words at an offset corresponding with the location of the
395 * atomic variable.
396 */
397
398static inline void
399put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
400{
401	size_t offset;
402
403	offset = (intptr_t)offset_ptr & 3;
404	r->v8[offset] = val;
405}
406
407static inline uint8_t
408get_1(const reg_t *r, const uint8_t *offset_ptr)
409{
410	size_t offset;
411
412	offset = (intptr_t)offset_ptr & 3;
413	return (r->v8[offset]);
414}
415
416static inline void
417put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
418{
419	size_t offset;
420	union {
421		uint16_t in;
422		uint8_t out[2];
423	} bytes;
424
425	offset = (intptr_t)offset_ptr & 3;
426	bytes.in = val;
427	r->v8[offset] = bytes.out[0];
428	r->v8[offset + 1] = bytes.out[1];
429}
430
431static inline uint16_t
432get_2(const reg_t *r, const uint16_t *offset_ptr)
433{
434	size_t offset;
435	union {
436		uint8_t in[2];
437		uint16_t out;
438	} bytes;
439
440	offset = (intptr_t)offset_ptr & 3;
441	bytes.in[0] = r->v8[offset];
442	bytes.in[1] = r->v8[offset + 1];
443	return (bytes.out);
444}
445
446/*
447 * 8-bit and 16-bit routines.
448 *
449 * These operations are not natively supported by the CPU, so we use
450 * some shifting and bitmasking on top of the 32-bit instructions.
451 */
452
453#define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t)				\
454uintN_t									\
455__sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)			\
456{									\
457	uint32_t *mem32;						\
458	reg_t val32, negmask, old;					\
459	uint32_t temp1, temp2;						\
460									\
461	mem32 = round_to_word(mem);					\
462	val32.v32 = 0x00000000;						\
463	put_##N(&val32, mem, val);					\
464	negmask.v32 = 0xffffffff;					\
465	put_##N(&negmask, mem, 0);					\
466									\
467	do_sync();							\
468	__asm volatile (						\
469		"1:"							\
470		"\tldrex %0, %6\n"	/* Load old value. */		\
471		"\tand   %2, %5, %0\n"	/* Remove the old value. */	\
472		"\torr   %2, %2, %4\n"	/* Put in the new value. */	\
473		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
474		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
475		"\tbne   1b\n"		/* Spin if failed. */		\
476		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
477		  "=&r" (temp2)						\
478		: "r" (val32.v32), "r" (negmask.v32), "m" (*mem32));	\
479	return (get_##N(&old, mem));					\
480}
481
482EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
483EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
484
485#define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
486uintN_t									\
487__sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,		\
488    uintN_t desired)							\
489{									\
490	uint32_t *mem32;						\
491	reg_t expected32, desired32, posmask, old;			\
492	uint32_t negmask, temp1, temp2;					\
493									\
494	mem32 = round_to_word(mem);					\
495	expected32.v32 = 0x00000000;					\
496	put_##N(&expected32, mem, expected);				\
497	desired32.v32 = 0x00000000;					\
498	put_##N(&desired32, mem, desired);				\
499	posmask.v32 = 0x00000000;					\
500	put_##N(&posmask, mem, ~0);					\
501	negmask = ~posmask.v32;						\
502									\
503	do_sync();							\
504	__asm volatile (						\
505		"1:"							\
506		"\tldrex %0, %8\n"	/* Load old value. */		\
507		"\tand   %2, %6, %0\n"	/* Isolate the old value. */	\
508		"\tcmp   %2, %4\n"	/* Compare to expected value. */\
509		"\tbne   2f\n"		/* Values are unequal. */	\
510		"\tand   %2, %7, %0\n"	/* Remove the old value. */	\
511		"\torr   %2, %5\n"	/* Put in the new value. */	\
512		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
513		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
514		"\tbne   1b\n"		/* Spin if failed. */		\
515		"2:"							\
516		: "=&r" (old), "=m" (*mem32), "=&r" (temp1),		\
517		  "=&r" (temp2)						\
518		: "r" (expected32.v32), "r" (desired32.v32),		\
519		  "r" (posmask.v32), "r" (negmask), "m" (*mem32));	\
520	return (get_##N(&old, mem));					\
521}
522
523EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
524EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
525
526#define	EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op)		\
527uintN_t									\
528__sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
529{									\
530	uint32_t *mem32;						\
531	reg_t val32, posmask, old;					\
532	uint32_t negmask, temp1, temp2;					\
533									\
534	mem32 = round_to_word(mem);					\
535	val32.v32 = 0x00000000;						\
536	put_##N(&val32, mem, val);					\
537	posmask.v32 = 0x00000000;					\
538	put_##N(&posmask, mem, ~0);					\
539	negmask = ~posmask.v32;						\
540									\
541	do_sync();							\
542	__asm volatile (						\
543		"1:"							\
544		"\tldrex %0, %7\n"	/* Load old value. */		\
545		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
546		"\tand   %2, %5\n"	/* Isolate the new value. */	\
547		"\tand   %3, %6, %0\n"	/* Remove the old value. */	\
548		"\torr   %2, %2, %3\n"	/* Put in the new value. */	\
549		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
550		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
551		"\tbne   1b\n"		/* Spin if failed. */		\
552		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
553		  "=&r" (temp2)						\
554		: "r" (val32.v32), "r" (posmask.v32), "r" (negmask),	\
555		  "m" (*mem32));					\
556	return (get_##N(&old, mem));					\
557}
558
559EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
560EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
561EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
562EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
563
564#define	EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence)	\
565uintN_t									\
566__sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
567{									\
568	uint32_t *mem32;						\
569	reg_t val32, old;						\
570	uint32_t temp1, temp2;						\
571									\
572	mem32 = round_to_word(mem);					\
573	val32.v32 = idempotence ? 0xffffffff : 0x00000000;		\
574	put_##N(&val32, mem, val);					\
575									\
576	do_sync();							\
577	__asm volatile (						\
578		"1:"							\
579		"\tldrex %0, %5\n"	/* Load old value. */		\
580		"\t"op"  %2, %4, %0\n"	/* Calculate new value. */	\
581		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
582		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
583		"\tbne   1b\n"		/* Spin if failed. */		\
584		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
585		  "=&r" (temp2)						\
586		: "r" (val32.v32), "m" (*mem32));			\
587	return (get_##N(&old, mem));					\
588}
589
590EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
591EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
592EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
593EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
594EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
595EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
596
597/*
598 * 32-bit routines.
599 */
600
601uint32_t
602__sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val)
603{
604	uint32_t old, temp;
605
606	do_sync();
607	__asm volatile (
608		"1:"
609		"\tldrex %0, %4\n"	/* Load old value. */
610		"\tstrex %2, %3, %1\n"	/* Attempt to store. */
611		"\tcmp   %2, #0\n"	/* Did it succeed? */
612		"\tbne   1b\n"		/* Spin if failed. */
613		: "=&r" (old), "=m" (*mem), "=&r" (temp)
614		: "r" (val), "m" (*mem));
615	return (old);
616}
617
618uint32_t
619__sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected,
620    uint32_t desired)
621{
622	uint32_t old, temp;
623
624	do_sync();
625	__asm volatile (
626		"1:"
627		"\tldrex %0, %5\n"	/* Load old value. */
628		"\tcmp   %0, %3\n"	/* Compare to expected value. */
629		"\tbne   2f\n"		/* Values are unequal. */
630		"\tstrex %2, %4, %1\n"	/* Attempt to store. */
631		"\tcmp   %2, #0\n"	/* Did it succeed? */
632		"\tbne   1b\n"		/* Spin if failed. */
633		"2:"
634		: "=&r" (old), "=m" (*mem), "=&r" (temp)
635		: "r" (expected), "r" (desired), "m" (*mem));
636	return (old);
637}
638
639#define	EMIT_FETCH_AND_OP_4(name, op)					\
640uint32_t								\
641__sync_##name##_4##_c(uint32_t *mem, uint32_t val)				\
642{									\
643	uint32_t old, temp1, temp2;					\
644									\
645	do_sync();							\
646	__asm volatile (						\
647		"1:"							\
648		"\tldrex %0, %5\n"	/* Load old value. */		\
649		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
650		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
651		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
652		"\tbne   1b\n"		/* Spin if failed. */		\
653		: "=&r" (old), "=m" (*mem), "=&r" (temp1),		\
654		  "=&r" (temp2)						\
655		: "r" (val), "m" (*mem));				\
656	return (old);							\
657}
658
659EMIT_FETCH_AND_OP_4(fetch_and_add, "add")
660EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
661EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
662EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
663EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
664
665#ifndef __clang__
666__strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
667__strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
668__strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
669__strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
670__strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
671__strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
672__strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
673__strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
674__strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
675__strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
676__strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
677__strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
678__strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
679__strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
680__strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
681__strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
682__strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
683__strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
684__strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
685__strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
686__strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
687#endif
688
689#else /* __ARM_ARCH_5__ */
690
691#ifdef _KERNEL
692
693#ifdef SMP
694#error "On SMP systems we should have proper atomic operations."
695#endif
696
697/*
698 * On uniprocessor systems, we can perform the atomic operations by
699 * disabling interrupts.
700 */
701
702#define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
703uintN_t									\
704__sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected,		\
705    uintN_t desired)							\
706{									\
707	uintN_t ret;							\
708									\
709	WITHOUT_INTERRUPTS({						\
710		ret = *mem;						\
711		if (*mem == expected)					\
712			*mem = desired;					\
713	});								\
714	return (ret);							\
715}
716
717#define	EMIT_FETCH_AND_OP_N(N, uintN_t, name, op)			\
718uintN_t									\
719__sync_##name##_##N(uintN_t *mem, uintN_t val)				\
720{									\
721	uintN_t ret;							\
722									\
723	WITHOUT_INTERRUPTS({						\
724		ret = *mem;						\
725		*mem op val;						\
726	});								\
727	return (ret);							\
728}
729
730#define	EMIT_ALL_OPS_N(N, uintN_t)					\
731EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)					\
732EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =)			\
733EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=)			\
734EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=)			\
735EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=)			\
736EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=)			\
737EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=)
738
739EMIT_ALL_OPS_N(1, uint8_t)
740EMIT_ALL_OPS_N(2, uint16_t)
741EMIT_ALL_OPS_N(4, uint32_t)
742EMIT_ALL_OPS_N(8, uint64_t)
743#undef	EMIT_ALL_OPS_N
744
745#else /* !_KERNEL */
746
747/*
748 * For userspace on uniprocessor systems, we can implement the atomic
749 * operations by using a Restartable Atomic Sequence. This makes the
750 * kernel restart the code from the beginning when interrupted.
751 */
752
753#define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)			\
754uintN_t									\
755__sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)			\
756{									\
757	uint32_t old, temp, ras_start;					\
758									\
759	ras_start = ARM_RAS_START;					\
760	__asm volatile (						\
761		/* Set up Restartable Atomic Sequence. */		\
762		"1:"							\
763		"\tadr   %2, 1b\n"					\
764		"\tstr   %2, [%5]\n"					\
765		"\tadr   %2, 2f\n"					\
766		"\tstr   %2, [%5, #4]\n"				\
767									\
768		"\t"ldr" %0, %4\n"	/* Load old value. */		\
769		"\t"str" %3, %1\n"	/* Store new value. */		\
770									\
771		/* Tear down Restartable Atomic Sequence. */		\
772		"2:"							\
773		"\tmov   %2, #0x00000000\n"				\
774		"\tstr   %2, [%5]\n"					\
775		"\tmov   %2, #0xffffffff\n"				\
776		"\tstr   %2, [%5, #4]\n"				\
777		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
778		: "r" (val), "m" (*mem), "r" (ras_start));		\
779	return (old);							\
780}
781
782#define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)		\
783uintN_t									\
784__sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,		\
785    uintN_t desired)							\
786{									\
787	uint32_t old, temp, ras_start;					\
788									\
789	ras_start = ARM_RAS_START;					\
790	__asm volatile (						\
791		/* Set up Restartable Atomic Sequence. */		\
792		"1:"							\
793		"\tadr   %2, 1b\n"					\
794		"\tstr   %2, [%6]\n"					\
795		"\tadr   %2, 2f\n"					\
796		"\tstr   %2, [%6, #4]\n"				\
797									\
798		"\t"ldr" %0, %5\n"	/* Load old value. */		\
799		"\tcmp   %0, %3\n"	/* Compare to expected value. */\
800		"\t"streq" %4, %1\n"	/* Store new value. */		\
801									\
802		/* Tear down Restartable Atomic Sequence. */		\
803		"2:"							\
804		"\tmov   %2, #0x00000000\n"				\
805		"\tstr   %2, [%6]\n"					\
806		"\tmov   %2, #0xffffffff\n"				\
807		"\tstr   %2, [%6, #4]\n"				\
808		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
809		: "r" (expected), "r" (desired), "m" (*mem),		\
810		  "r" (ras_start));					\
811	return (old);							\
812}
813
814#define	EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op)		\
815uintN_t									\
816__sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
817{									\
818	uint32_t old, temp, ras_start;					\
819									\
820	ras_start = ARM_RAS_START;					\
821	__asm volatile (						\
822		/* Set up Restartable Atomic Sequence. */		\
823		"1:"							\
824		"\tadr   %2, 1b\n"					\
825		"\tstr   %2, [%5]\n"					\
826		"\tadr   %2, 2f\n"					\
827		"\tstr   %2, [%5, #4]\n"				\
828									\
829		"\t"ldr" %0, %4\n"	/* Load old value. */		\
830		"\t"op"  %2, %0, %3\n"	/* Calculate new value. */	\
831		"\t"str" %2, %1\n"	/* Store new value. */		\
832									\
833		/* Tear down Restartable Atomic Sequence. */		\
834		"2:"							\
835		"\tmov   %2, #0x00000000\n"				\
836		"\tstr   %2, [%5]\n"					\
837		"\tmov   %2, #0xffffffff\n"				\
838		"\tstr   %2, [%5, #4]\n"				\
839		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
840		: "r" (val), "m" (*mem), "r" (ras_start));		\
841	return (old);							\
842}
843
844#define	EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)			\
845EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)				\
846EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)			\
847EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add")		\
848EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and")		\
849EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr")		\
850EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub")		\
851EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor")
852
853#ifdef __clang__
854EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
855EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
856#else
857EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb")
858EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh")
859#endif
860EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
861
862#ifndef __clang__
863__strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
864__strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
865__strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
866__strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
867__strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
868__strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
869__strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
870__strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
871__strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
872__strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
873__strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
874__strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
875__strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
876__strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
877__strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
878__strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
879__strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
880__strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
881__strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
882__strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
883__strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
884#endif
885
886#endif /* _KERNEL */
887
888#endif
889
890#endif /* __SYNC_ATOMICS */
891