stdatomic.c revision 251695
1/*-
2 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/arm/arm/stdatomic.c 251695 2013-06-13 18:46:49Z ed $");
29
30#include <sys/param.h>
31#include <sys/stdatomic.h>
32#include <sys/types.h>
33
34#include <machine/cpufunc.h>
35
36#ifdef _KERNEL
37#include "opt_global.h"
38#endif
39
40/*
41 * Executing statements with interrupts disabled.
42 */
43
44#ifndef SMP
45#define	WITHOUT_INTERRUPTS(s) do {					\
46	register_t regs;						\
47									\
48	regs = intr_disable();						\
49	do s while (0);							\
50	intr_restore(regs);						\
51} while (0)
52#endif /* !SMP */
53
54/*
55 * Memory barriers.
56 *
57 * It turns out __sync_synchronize() does not emit any code when used
58 * with GCC 4.2. Implement our own version that does work reliably.
59 *
60 * Although __sync_lock_test_and_set() should only perform an acquire
61 * barrier, make it do a full barrier like the other functions. This
62 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
63 */
64
65static inline void
66do_sync(void)
67{
68
69#if defined(_KERNEL) && !defined(SMP)
70	__asm volatile ("" : : : "memory");
71#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
72	__asm volatile ("dmb" : : : "memory");
73#else /* __ARM_ARCH_6__ */
74	__asm volatile ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory");
75#endif
76}
77
78#if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS)
79
80/*
81 * New C11 __atomic_* API.
82 */
83
84#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
85    defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
86    defined(__ARM_ARCH_6ZK__) || \
87    defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
88
89/* These systems should be supported by the compiler. */
90
91#elif defined(_KERNEL)
92
93#ifdef SMP
94#error "On SMP systems we should have proper atomic operations."
95#endif
96
97/* Clang doesn't allow us to reimplement builtins without this. */
98#ifdef __clang__
99#pragma redefine_extname __sync_synchronize_ext __sync_synchronize
100#define __sync_synchronize __sync_synchronize_ext
101#endif
102
103void
104__sync_synchronize(void)
105{
106
107	do_sync();
108}
109
110/*
111 * On uniprocessor systems, we can perform the atomic operations by
112 * disabling interrupts.
113 */
114
115#define	EMIT_LOAD_N(N, uintN_t)						\
116uintN_t									\
117__atomic_load_##N(uintN_t *mem, int model __unused)			\
118{									\
119	uintN_t ret;							\
120									\
121	WITHOUT_INTERRUPTS({						\
122		ret = *mem;						\
123	});								\
124	return (ret);							\
125}
126
127#define	EMIT_STORE_N(N, uintN_t)					\
128void									\
129__atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)	\
130{									\
131									\
132	WITHOUT_INTERRUPTS({						\
133		*mem = val;						\
134	});								\
135}
136
137#define	EMIT_COMPARE_EXCHANGE_N(N, uintN_t)				\
138_Bool									\
139__atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected,		\
140    uintN_t desired, int success __unused, int failure __unused)	\
141{									\
142	_Bool ret;							\
143									\
144	WITHOUT_INTERRUPTS({						\
145		if (*mem == *expected) {				\
146			*mem = desired;					\
147			ret = 1;					\
148		} else {						\
149			*expected = *mem;				\
150			ret = 0;					\
151		}							\
152	});								\
153	return (ret);							\
154}
155
156#define	EMIT_FETCH_OP_N(N, uintN_t, name, op)				\
157uintN_t									\
158__atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)	\
159{									\
160	uintN_t ret;							\
161									\
162	WITHOUT_INTERRUPTS({						\
163		ret = *mem;						\
164		*mem op val;						\
165	});								\
166	return (ret);							\
167}
168
169#define	EMIT_ALL_OPS_N(N, uintN_t)					\
170EMIT_LOAD_N(N, uintN_t)							\
171EMIT_STORE_N(N, uintN_t)						\
172EMIT_COMPARE_EXCHANGE_N(N, uintN_t)					\
173EMIT_FETCH_OP_N(N, uintN_t, exchange, =)				\
174EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=)				\
175EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=)				\
176EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=)				\
177EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=)				\
178EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=)
179
180EMIT_ALL_OPS_N(1, uint8_t)
181EMIT_ALL_OPS_N(2, uint16_t)
182EMIT_ALL_OPS_N(4, uint32_t)
183EMIT_ALL_OPS_N(8, uint64_t)
184
185#else /* !__ARM_ARCH_6__ && !__ARM_ARCH_7__ && !_KERNEL */
186
187/* XXX: Implement intrinsics for ARMv5 userspace. */
188
189#endif
190
191#endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */
192
193/*
194 * Old __sync_* API.
195 */
196
197#if defined(__SYNC_ATOMICS)
198
199#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
200    defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
201    defined(__ARM_ARCH_6ZK__) || \
202    defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
203
204/* Implementations for old GCC versions, lacking support for atomics. */
205
206typedef union {
207	uint8_t		v8[4];
208	uint32_t	v32;
209} reg_t;
210
211/*
212 * Given a memory address pointing to an 8-bit or 16-bit integer, return
213 * the address of the 32-bit word containing it.
214 */
215
216static inline uint32_t *
217round_to_word(void *ptr)
218{
219
220	return ((uint32_t *)((intptr_t)ptr & ~3));
221}
222
223/*
224 * Utility functions for loading and storing 8-bit and 16-bit integers
225 * in 32-bit words at an offset corresponding with the location of the
226 * atomic variable.
227 */
228
229static inline void
230put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
231{
232	size_t offset;
233
234	offset = (intptr_t)offset_ptr & 3;
235	r->v8[offset] = val;
236}
237
238static inline uint8_t
239get_1(const reg_t *r, const uint8_t *offset_ptr)
240{
241	size_t offset;
242
243	offset = (intptr_t)offset_ptr & 3;
244	return (r->v8[offset]);
245}
246
247static inline void
248put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
249{
250	size_t offset;
251	union {
252		uint16_t in;
253		uint8_t out[2];
254	} bytes;
255
256	offset = (intptr_t)offset_ptr & 3;
257	bytes.in = val;
258	r->v8[offset] = bytes.out[0];
259	r->v8[offset + 1] = bytes.out[1];
260}
261
262static inline uint16_t
263get_2(const reg_t *r, const uint16_t *offset_ptr)
264{
265	size_t offset;
266	union {
267		uint8_t in[2];
268		uint16_t out;
269	} bytes;
270
271	offset = (intptr_t)offset_ptr & 3;
272	bytes.in[0] = r->v8[offset];
273	bytes.in[1] = r->v8[offset + 1];
274	return (bytes.out);
275}
276
277/*
278 * 8-bit and 16-bit routines.
279 *
280 * These operations are not natively supported by the CPU, so we use
281 * some shifting and bitmasking on top of the 32-bit instructions.
282 */
283
284#define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t)				\
285uintN_t									\
286__sync_lock_test_and_set_##N(uintN_t *mem, uintN_t val)			\
287{									\
288	uint32_t *mem32;						\
289	reg_t val32, negmask, old;					\
290	uint32_t temp1, temp2;						\
291									\
292	mem32 = round_to_word(mem);					\
293	val32.v32 = 0x00000000;						\
294	put_##N(&val32, mem, val);					\
295	negmask.v32 = 0xffffffff;					\
296	put_##N(&negmask, mem, 0);					\
297									\
298	do_sync();							\
299	__asm volatile (						\
300		"1:"							\
301		"\tldrex %0, %6\n"	/* Load old value. */		\
302		"\tand   %2, %5, %0\n"	/* Remove the old value. */	\
303		"\torr   %2, %2, %4\n"	/* Put in the new value. */	\
304		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
305		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
306		"\tbne   1b\n"		/* Spin if failed. */		\
307		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
308		  "=&r" (temp2)						\
309		: "r" (val32.v32), "r" (negmask.v32), "m" (*mem32));	\
310	return (get_##N(&old, mem));					\
311}
312
313EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
314EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
315
316#define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
317uintN_t									\
318__sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected,		\
319    uintN_t desired)							\
320{									\
321	uint32_t *mem32;						\
322	reg_t expected32, desired32, posmask, negmask, old;		\
323	uint32_t temp1, temp2;						\
324									\
325	mem32 = round_to_word(mem);					\
326	expected32.v32 = 0x00000000;					\
327	put_##N(&expected32, mem, expected);				\
328	desired32.v32 = 0x00000000;					\
329	put_##N(&desired32, mem, desired);				\
330	posmask.v32 = 0x00000000;					\
331	put_##N(&posmask, mem, ~0);					\
332	negmask.v32 = ~posmask.v32;					\
333									\
334	do_sync();							\
335	__asm volatile (						\
336		"1:"							\
337		"\tldrex %0, %8\n"	/* Load old value. */		\
338		"\tand   %2, %6, %0\n"	/* Isolate the old value. */	\
339		"\tcmp   %2, %4\n"	/* Compare to expected value. */\
340		"\tbne   2f\n"		/* Values are unequal. */	\
341		"\tand   %2, %7, %0\n"	/* Remove the old value. */	\
342		"\torr   %2, %5\n"	/* Put in the new value. */	\
343		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
344		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
345		"\tbne   1b\n"		/* Spin if failed. */		\
346		"2:"							\
347		: "=&r" (old), "=m" (*mem32), "=&r" (temp1),		\
348		  "=&r" (temp2)						\
349		: "r" (expected32.v32), "r" (desired32.v32),		\
350		  "r" (posmask.v32), "r" (negmask.v32), "m" (*mem32));	\
351	return (get_##N(&old, mem));					\
352}
353
354EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
355EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
356
357#define	EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op)		\
358uintN_t									\
359__sync_##name##_##N(uintN_t *mem, uintN_t val)				\
360{									\
361	uint32_t *mem32;						\
362	reg_t val32, posmask, negmask, old;				\
363	uint32_t temp1, temp2;						\
364									\
365	mem32 = round_to_word(mem);					\
366	val32.v32 = 0x00000000;						\
367	put_##N(&val32, mem, val);					\
368	posmask.v32 = 0x00000000;					\
369	put_##N(&posmask, mem, ~0);					\
370	negmask.v32 = ~posmask.v32;					\
371									\
372	do_sync();							\
373	__asm volatile (						\
374		"1:"							\
375		"\tldrex %0, %7\n"	/* Load old value. */		\
376		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
377		"\tand   %2, %5\n"	/* Isolate the new value. */	\
378		"\tand   %3, %6, %0\n"	/* Remove the old value. */	\
379		"\torr   %2, %2, %3\n"	/* Put in the new value. */	\
380		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
381		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
382		"\tbne   1b\n"		/* Spin if failed. */		\
383		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
384		  "=&r" (temp2)						\
385		: "r" (val32.v32), "r" (posmask.v32),			\
386		  "r" (negmask.v32), "m" (*mem32));			\
387	return (get_##N(&old, mem));					\
388}
389
390EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
391EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
392EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
393EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
394
395#define	EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence)	\
396uintN_t									\
397__sync_##name##_##N(uintN_t *mem, uintN_t val)				\
398{									\
399	uint32_t *mem32;						\
400	reg_t val32, old;						\
401	uint32_t temp1, temp2;						\
402									\
403	mem32 = round_to_word(mem);					\
404	val32.v32 = idempotence ? 0xffffffff : 0x00000000;		\
405	put_##N(&val32, mem, val);					\
406									\
407	do_sync();							\
408	__asm volatile (						\
409		"1:"							\
410		"\tldrex %0, %5\n"	/* Load old value. */		\
411		"\t"op"  %2, %4, %0\n"	/* Calculate new value. */	\
412		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
413		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
414		"\tbne   1b\n"		/* Spin if failed. */		\
415		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
416		  "=&r" (temp2)						\
417		: "r" (val32.v32), "m" (*mem32));			\
418	return (get_##N(&old, mem));					\
419}
420
421EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
422EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
423EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
424EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
425EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
426EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
427
428/*
429 * 32-bit routines.
430 */
431
432uint32_t
433__sync_val_compare_and_swap_4(uint32_t *mem, uint32_t expected,
434    uint32_t desired)
435{
436	uint32_t old, temp1, temp2;
437
438	do_sync();
439	__asm volatile (
440		"1:"
441		"\tldrex %0, %6\n"	/* Load old value. */
442		"\tcmp   %0, %4\n"	/* Compare to expected value. */
443		"\tbne   2f\n"		/* Values are unequal. */
444		"\tmov   %2, %5\n"	/* Value to store. */
445		"\tstrex %3, %2, %1\n"	/* Attempt to store. */
446		"\tcmp   %3, #0\n"	/* Did it succeed? */
447		"\tbne   1b\n"		/* Spin if failed. */
448		"2:"
449		: "=&r" (old), "=m" (*mem), "=&r" (temp1), "=&r" (temp2)
450		: "r" (expected), "r" (desired), "m" (*mem));
451	return (old);
452}
453
454#define	EMIT_FETCH_AND_OP_4(name, op)					\
455uint32_t								\
456__sync_##name##_4(uint32_t *mem, uint32_t val)				\
457{									\
458	uint32_t old, temp1, temp2;					\
459									\
460	do_sync();							\
461	__asm volatile (						\
462		"1:"							\
463		"\tldrex %0, %5\n"	/* Load old value. */		\
464		"\t"op"\n"		/* Calculate new value. */	\
465		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
466		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
467		"\tbne   1b\n"		/* Spin if failed. */		\
468		: "=&r" (old), "=m" (*mem), "=&r" (temp1),		\
469		  "=&r" (temp2)						\
470		: "r" (val), "m" (*mem));				\
471	return (old);							\
472}
473
474EMIT_FETCH_AND_OP_4(lock_test_and_set, "mov %2, %4")
475EMIT_FETCH_AND_OP_4(fetch_and_add, "add %2, %0, %4")
476EMIT_FETCH_AND_OP_4(fetch_and_and, "and %2, %0, %4")
477EMIT_FETCH_AND_OP_4(fetch_and_or, "orr %2, %0, %4")
478EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub %2, %0, %4")
479EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor %2, %0, %4")
480
481#elif defined(_KERNEL)
482
483#ifdef SMP
484#error "On SMP systems we should have proper atomic operations."
485#endif
486
487/*
488 * On uniprocessor systems, we can perform the atomic operations by
489 * disabling interrupts.
490 */
491
492#define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
493uintN_t									\
494__sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected,		\
495    uintN_t desired)							\
496{									\
497	uintN_t ret;							\
498									\
499	WITHOUT_INTERRUPTS({						\
500		ret = *mem;						\
501		if (*mem == expected)					\
502			*mem = desired;					\
503	});								\
504	return (ret);							\
505}
506
507#define	EMIT_FETCH_AND_OP_N(N, uintN_t, name, op)			\
508uintN_t									\
509__sync_##name##_##N(uintN_t *mem, uintN_t val)				\
510{									\
511	uintN_t ret;							\
512									\
513	WITHOUT_INTERRUPTS({						\
514		ret = *mem;						\
515		*mem op val;						\
516	});								\
517	return (ret);							\
518}
519
520#define	EMIT_ALL_OPS_N(N, uintN_t)					\
521EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)					\
522EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =)			\
523EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=)			\
524EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=)			\
525EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=)			\
526EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=)			\
527EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=)
528
529EMIT_ALL_OPS_N(1, uint8_t)
530EMIT_ALL_OPS_N(2, uint16_t)
531EMIT_ALL_OPS_N(4, uint32_t)
532EMIT_ALL_OPS_N(8, uint64_t)
533
534#else /* !__ARM_ARCH_6__ && !__ARM_ARCH_7__ && !_KERNEL */
535
536/* XXX: Implement intrinsics for ARMv5 userspace. */
537
538#endif
539
540#endif /* __SYNC_ATOMICS */
541