stdatomic.c revision 302408
1/*-
2 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Copyright (c) 1998 Doug Rabson
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: stable/11/sys/mips/mips/stdatomic.c 274668 2014-11-18 17:06:56Z imp $");
32
33#include <sys/stdatomic.h>
34#include <sys/types.h>
35
36#if defined(__SYNC_ATOMICS)
37
38/*
39 * Memory barriers.
40 *
41 * It turns out __sync_synchronize() does not emit any code when used
42 * with GCC 4.2. Implement our own version that does work reliably.
43 *
44 * Although __sync_lock_test_and_set() should only perform an acquire
45 * barrier, make it do a full barrier like the other functions. This
46 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
47 */
48
49static inline void
50do_sync(void)
51{
52
53	__asm volatile (
54#if !defined(_KERNEL) || defined(SMP)
55		".set noreorder\n"
56		"\tsync\n"
57		"\tnop\n"
58		"\tnop\n"
59		"\tnop\n"
60		"\tnop\n"
61		"\tnop\n"
62		"\tnop\n"
63		"\tnop\n"
64		"\tnop\n"
65		".set reorder\n"
66#else /* _KERNEL && !SMP */
67		""
68#endif /* !KERNEL || SMP */
69		: : : "memory");
70}
71
72typedef union {
73	uint8_t		v8[4];
74	uint32_t	v32;
75} reg_t;
76
77/*
78 * Given a memory address pointing to an 8-bit or 16-bit integer, return
79 * the address of the 32-bit word containing it.
80 */
81
82static inline uint32_t *
83round_to_word(void *ptr)
84{
85
86	return ((uint32_t *)((intptr_t)ptr & ~3));
87}
88
89/*
90 * Utility functions for loading and storing 8-bit and 16-bit integers
91 * in 32-bit words at an offset corresponding with the location of the
92 * atomic variable.
93 */
94
95static inline void
96put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
97{
98	size_t offset;
99
100	offset = (intptr_t)offset_ptr & 3;
101	r->v8[offset] = val;
102}
103
104static inline uint8_t
105get_1(const reg_t *r, const uint8_t *offset_ptr)
106{
107	size_t offset;
108
109	offset = (intptr_t)offset_ptr & 3;
110	return (r->v8[offset]);
111}
112
113static inline void
114put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
115{
116	size_t offset;
117	union {
118		uint16_t in;
119		uint8_t out[2];
120	} bytes;
121
122	offset = (intptr_t)offset_ptr & 3;
123	bytes.in = val;
124	r->v8[offset] = bytes.out[0];
125	r->v8[offset + 1] = bytes.out[1];
126}
127
128static inline uint16_t
129get_2(const reg_t *r, const uint16_t *offset_ptr)
130{
131	size_t offset;
132	union {
133		uint8_t in[2];
134		uint16_t out;
135	} bytes;
136
137	offset = (intptr_t)offset_ptr & 3;
138	bytes.in[0] = r->v8[offset];
139	bytes.in[1] = r->v8[offset + 1];
140	return (bytes.out);
141}
142
143/*
144 * 8-bit and 16-bit routines.
145 *
146 * These operations are not natively supported by the CPU, so we use
147 * some shifting and bitmasking on top of the 32-bit instructions.
148 */
149
150#define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t)				\
151uintN_t									\
152__sync_lock_test_and_set_##N(uintN_t *mem, uintN_t val)			\
153{									\
154	uint32_t *mem32;						\
155	reg_t val32, negmask, old;					\
156	uint32_t temp;							\
157									\
158	mem32 = round_to_word(mem);					\
159	val32.v32 = 0x00000000;						\
160	put_##N(&val32, mem, val);					\
161	negmask.v32 = 0xffffffff;					\
162	put_##N(&negmask, mem, 0);					\
163									\
164	do_sync();							\
165	__asm volatile (						\
166		"1:"							\
167		"\tll	%0, %5\n"	/* Load old value. */		\
168		"\tand	%2, %4, %0\n"	/* Remove the old value. */	\
169		"\tor	%2, %3\n"	/* Put in the new value. */	\
170		"\tsc	%2, %1\n"	/* Attempt to store. */		\
171		"\tbeqz	%2, 1b\n"	/* Spin if failed. */		\
172		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp)		\
173		: "r" (val32.v32), "r" (negmask.v32), "m" (*mem32));	\
174	return (get_##N(&old, mem));					\
175}
176
177EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
178EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
179
180#define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
181uintN_t									\
182__sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected,		\
183    uintN_t desired)							\
184{									\
185	uint32_t *mem32;						\
186	reg_t expected32, desired32, posmask, old;			\
187	uint32_t negmask, temp;						\
188									\
189	mem32 = round_to_word(mem);					\
190	expected32.v32 = 0x00000000;					\
191	put_##N(&expected32, mem, expected);				\
192	desired32.v32 = 0x00000000;					\
193	put_##N(&desired32, mem, desired);				\
194	posmask.v32 = 0x00000000;					\
195	put_##N(&posmask, mem, ~0);					\
196	negmask = ~posmask.v32;						\
197									\
198	do_sync();							\
199	__asm volatile (						\
200		"1:"							\
201		"\tll	%0, %7\n"	/* Load old value. */		\
202		"\tand	%2, %5, %0\n"	/* Isolate the old value. */	\
203		"\tbne	%2, %3, 2f\n"	/* Compare to expected value. */\
204		"\tand	%2, %6, %0\n"	/* Remove the old value. */	\
205		"\tor	%2, %4\n"	/* Put in the new value. */	\
206		"\tsc	%2, %1\n"	/* Attempt to store. */		\
207		"\tbeqz	%2, 1b\n"	/* Spin if failed. */		\
208		"2:"							\
209		: "=&r" (old), "=m" (*mem32), "=&r" (temp)		\
210		: "r" (expected32.v32), "r" (desired32.v32),		\
211		  "r" (posmask.v32), "r" (negmask), "m" (*mem32));	\
212	return (get_##N(&old, mem));					\
213}
214
215EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
216EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
217
218#define	EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op)		\
219uintN_t									\
220__sync_##name##_##N(uintN_t *mem, uintN_t val)				\
221{									\
222	uint32_t *mem32;						\
223	reg_t val32, posmask, old;					\
224	uint32_t negmask, temp1, temp2;					\
225									\
226	mem32 = round_to_word(mem);					\
227	val32.v32 = 0x00000000;						\
228	put_##N(&val32, mem, val);					\
229	posmask.v32 = 0x00000000;					\
230	put_##N(&posmask, mem, ~0);					\
231	negmask = ~posmask.v32;						\
232									\
233	do_sync();							\
234	__asm volatile (						\
235		"1:"							\
236		"\tll	%0, %7\n"	/* Load old value. */		\
237		"\t"op"	%2, %0, %4\n"	/* Calculate new value. */	\
238		"\tand	%2, %5\n"	/* Isolate the new value. */	\
239		"\tand	%3, %6, %0\n"	/* Remove the old value. */	\
240		"\tor	%2, %3\n"	/* Put in the new value. */	\
241		"\tsc	%2, %1\n"	/* Attempt to store. */		\
242		"\tbeqz	%2, 1b\n"	/* Spin if failed. */		\
243		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
244		  "=&r" (temp2)						\
245		: "r" (val32.v32), "r" (posmask.v32), "r" (negmask),	\
246		  "m" (*mem32));					\
247	return (get_##N(&old, mem));					\
248}
249
250EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "addu")
251EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "subu")
252EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "addu")
253EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "subu")
254
255#define	EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence)	\
256uintN_t									\
257__sync_##name##_##N(uintN_t *mem, uintN_t val)				\
258{									\
259	uint32_t *mem32;						\
260	reg_t val32, old;						\
261	uint32_t temp;							\
262									\
263	mem32 = round_to_word(mem);					\
264	val32.v32 = idempotence ? 0xffffffff : 0x00000000;		\
265	put_##N(&val32, mem, val);					\
266									\
267	do_sync();							\
268	__asm volatile (						\
269		"1:"							\
270		"\tll	%0, %4\n"	/* Load old value. */		\
271		"\t"op"	%2, %3, %0\n"	/* Calculate new value. */	\
272		"\tsc	%2, %1\n"	/* Attempt to store. */		\
273		"\tbeqz	%2, 1b\n"	/* Spin if failed. */		\
274		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp)		\
275		: "r" (val32.v32), "m" (*mem32));			\
276	return (get_##N(&old, mem));					\
277}
278
279EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
280EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "or", 0)
281EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "xor", 0)
282EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
283EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "or", 0)
284EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "xor", 0)
285
286/*
287 * 32-bit routines.
288 */
289
290uint32_t
291__sync_val_compare_and_swap_4(uint32_t *mem, uint32_t expected,
292    uint32_t desired)
293{
294	uint32_t old, temp;
295
296	do_sync();
297	__asm volatile (
298		"1:"
299		"\tll	%0, %5\n"	/* Load old value. */
300		"\tbne	%0, %3, 2f\n"	/* Compare to expected value. */
301		"\tmove	%2, %4\n"	/* Value to store. */
302		"\tsc	%2, %1\n"	/* Attempt to store. */
303		"\tbeqz	%2, 1b\n"	/* Spin if failed. */
304		"2:"
305		: "=&r" (old), "=m" (*mem), "=&r" (temp)
306		: "r" (expected), "r" (desired), "m" (*mem));
307	return (old);
308}
309
310#define	EMIT_FETCH_AND_OP_4(name, op)					\
311uint32_t								\
312__sync_##name##_4(uint32_t *mem, uint32_t val)				\
313{									\
314	uint32_t old, temp;						\
315									\
316	do_sync();							\
317	__asm volatile (						\
318		"1:"							\
319		"\tll	%0, %4\n"	/* Load old value. */		\
320		"\t"op"\n"		/* Calculate new value. */	\
321		"\tsc	%2, %1\n"	/* Attempt to store. */		\
322		"\tbeqz	%2, 1b\n"	/* Spin if failed. */		\
323		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
324		: "r" (val), "m" (*mem));				\
325	return (old);							\
326}
327
328EMIT_FETCH_AND_OP_4(lock_test_and_set, "move %2, %3")
329EMIT_FETCH_AND_OP_4(fetch_and_add, "addu %2, %0, %3")
330EMIT_FETCH_AND_OP_4(fetch_and_and, "and %2, %0, %3")
331EMIT_FETCH_AND_OP_4(fetch_and_or, "or %2, %0, %3")
332EMIT_FETCH_AND_OP_4(fetch_and_sub, "subu %2, %0, %3")
333EMIT_FETCH_AND_OP_4(fetch_and_xor, "xor %2, %0, %3")
334
335/*
336 * 64-bit routines.
337 *
338 * Note: All the 64-bit atomic operations are only atomic when running
339 * in 64-bit mode. It is assumed that code compiled for n32 and n64 fits
340 * into this definition and no further safeties are needed.
341 */
342
343#if defined(__mips_n32) || defined(__mips_n64)
344
345uint64_t
346__sync_val_compare_and_swap_8(uint64_t *mem, uint64_t expected,
347    uint64_t desired)
348{
349	uint64_t old, temp;
350
351	do_sync();
352	__asm volatile (
353		"1:"
354		"\tlld	%0, %5\n"	/* Load old value. */
355		"\tbne	%0, %3, 2f\n"	/* Compare to expected value. */
356		"\tmove	%2, %4\n"	/* Value to store. */
357		"\tscd	%2, %1\n"	/* Attempt to store. */
358		"\tbeqz	%2, 1b\n"	/* Spin if failed. */
359		"2:"
360		: "=&r" (old), "=m" (*mem), "=&r" (temp)
361		: "r" (expected), "r" (desired), "m" (*mem));
362	return (old);
363}
364
365#define	EMIT_FETCH_AND_OP_8(name, op)					\
366uint64_t								\
367__sync_##name##_8(uint64_t *mem, uint64_t val)				\
368{									\
369	uint64_t old, temp;						\
370									\
371	do_sync();							\
372	__asm volatile (						\
373		"1:"							\
374		"\tlld	%0, %4\n"	/* Load old value. */		\
375		"\t"op"\n"		/* Calculate new value. */	\
376		"\tscd	%2, %1\n"	/* Attempt to store. */		\
377		"\tbeqz	%2, 1b\n"	/* Spin if failed. */		\
378		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
379		: "r" (val), "m" (*mem));				\
380	return (old);							\
381}
382
383EMIT_FETCH_AND_OP_8(lock_test_and_set, "move %2, %3")
384EMIT_FETCH_AND_OP_8(fetch_and_add, "daddu %2, %0, %3")
385EMIT_FETCH_AND_OP_8(fetch_and_and, "and %2, %0, %3")
386EMIT_FETCH_AND_OP_8(fetch_and_or, "or %2, %0, %3")
387EMIT_FETCH_AND_OP_8(fetch_and_sub, "dsubu %2, %0, %3")
388EMIT_FETCH_AND_OP_8(fetch_and_xor, "xor %2, %0, %3")
389
390#endif /* __mips_n32 || __mips_n64 */
391
392#endif /* __SYNC_ATOMICS */
393