stdatomic.c revision 251559
1/*-
2 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Copyright (c) 1998 Doug Rabson
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/mips/mips/stdatomic.c 251559 2013-06-08 23:45:11Z ed $");
32
33#include <sys/types.h>
34
35#ifdef _KERNEL
36#include "opt_global.h"
37#endif
38
39/*
40 * Memory barriers.
41 *
42 * It turns out __sync_synchronize() does not emit any code when used
43 * with GCC 4.2. Implement our own version that does work reliably.
44 *
45 * Although __sync_lock_test_and_set() should only perform an acquire
46 * barrier, make it do a full barrier like the other functions. This
47 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
48 */
49
50static inline void
51mips_sync(void)
52{
53
54	__asm volatile (
55#if !defined(_KERNEL) || defined(SMP)
56		".set noreorder\n"
57		"\tsync\n"
58		"\tnop\n"
59		"\tnop\n"
60		"\tnop\n"
61		"\tnop\n"
62		"\tnop\n"
63		"\tnop\n"
64		"\tnop\n"
65		"\tnop\n"
66		".set reorder\n"
67#else /* _KERNEL && !SMP */
68		""
69#endif /* !KERNEL || SMP */
70		: : : "memory");
71}
72
73typedef union {
74	uint8_t		v8[4];
75	uint32_t	v32;
76} reg_t;
77
78/*
79 * Given a memory address pointing to an 8-bit or 16-bit integer, return
80 * the address of the 32-bit word containing it.
81 */
82
83static inline uint32_t *
84round_to_word(void *ptr)
85{
86
87	return ((uint32_t *)((intptr_t)ptr & ~3));
88}
89
90/*
91 * Utility functions for loading and storing 8-bit and 16-bit integers
92 * in 32-bit words at an offset corresponding with the location of the
93 * atomic variable.
94 */
95
96static inline void
97put_1(reg_t *r, uint8_t *offset_ptr, uint8_t val)
98{
99	size_t offset;
100
101	offset = (intptr_t)offset_ptr & 3;
102	r->v8[offset] = val;
103}
104
105static inline uint8_t
106get_1(const reg_t *r, uint8_t *offset_ptr)
107{
108	size_t offset;
109
110	offset = (intptr_t)offset_ptr & 3;
111	return (r->v8[offset]);
112}
113
114static inline void
115put_2(reg_t *r, uint16_t *offset_ptr, uint16_t val)
116{
117	size_t offset;
118	union {
119		uint16_t in;
120		uint8_t out[2];
121	} bytes;
122
123	offset = (intptr_t)offset_ptr & 3;
124	bytes.in = val;
125	r->v8[offset] = bytes.out[0];
126	r->v8[offset + 1] = bytes.out[1];
127}
128
129static inline uint16_t
130get_2(const reg_t *r, uint16_t *offset_ptr)
131{
132	size_t offset;
133	union {
134		uint8_t in[2];
135		uint16_t out;
136	} bytes;
137
138	offset = (intptr_t)offset_ptr & 3;
139	bytes.in[0] = r->v8[offset];
140	bytes.in[1] = r->v8[offset + 1];
141	return (bytes.out);
142}
143
144/*
145 * 8-bit and 16-bit routines.
146 *
147 * These operations are not natively supported by the CPU, so we use
148 * some shifting and bitmasking on top of the 32-bit instructions.
149 */
150
151#define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t)				\
152uintN_t									\
153__sync_lock_test_and_set_##N(uintN_t *mem, uintN_t val)			\
154{									\
155	uint32_t *mem32;						\
156	reg_t val32, negmask, old;					\
157	uint32_t temp;							\
158									\
159	mem32 = round_to_word(mem);					\
160	val32.v32 = 0x00000000;						\
161	put_##N(&val32, mem, val);					\
162	negmask.v32 = 0xffffffff;					\
163	put_##N(&negmask, mem, 0);					\
164									\
165	mips_sync();							\
166	__asm volatile (						\
167		"1:"							\
168		"\tll	%0, %5\n"	/* Load old value. */		\
169		"\tand	%2, %4, %0\n"	/* Remove the old value. */	\
170		"\tor	%2, %3\n"	/* Put in the new value. */	\
171		"\tsc	%2, %1\n"	/* Attempt to store. */		\
172		"\tbeqz	%2, 1b\n"	/* Spin if failed. */		\
173		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp)		\
174		: "r" (val32.v32), "r" (negmask.v32), "m" (*mem32));	\
175	return (get_##N(&old, mem));					\
176}
177
178EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
179EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
180
181#define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
182uintN_t									\
183__sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected,		\
184    uintN_t desired)							\
185{									\
186	uint32_t *mem32;						\
187	reg_t expected32, desired32, posmask, negmask, old;		\
188	uint32_t temp;							\
189									\
190	mem32 = round_to_word(mem);					\
191	expected32.v32 = 0x00000000;					\
192	put_##N(&expected32, mem, expected);				\
193	desired32.v32 = 0x00000000;					\
194	put_##N(&desired32, mem, desired);				\
195	posmask.v32 = 0x00000000;					\
196	put_##N(&posmask, mem, ~0);					\
197	negmask.v32 = ~posmask.v32;					\
198									\
199	mips_sync();							\
200	__asm volatile (						\
201		"1:"							\
202		"\tll	%0, %7\n"	/* Load old value. */		\
203		"\tand	%2, %5, %0\n"	/* Isolate the old value. */	\
204		"\tbne	%2, %3, 2f\n"	/* Compare to expected value. */\
205		"\tand	%2, %6, %0\n"	/* Remove the old value. */	\
206		"\tor	%2, %4\n"	/* Put in the new value. */	\
207		"\tsc	%2, %1\n"	/* Attempt to store. */		\
208		"\tbeqz	%2, 1b\n"	/* Spin if failed. */		\
209		"2:"							\
210		: "=&r" (old), "=m" (*mem32), "=&r" (temp)		\
211		: "r" (expected32.v32), "r" (desired32.v32),		\
212		  "r" (posmask.v32), "r" (negmask.v32), "m" (*mem32));	\
213	return (get_##N(&old, mem));					\
214}
215
216EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
217EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
218
219#define	EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op)		\
220uintN_t									\
221__sync_##name##_##N(uintN_t *mem, uintN_t val)				\
222{									\
223	uint32_t *mem32;						\
224	reg_t val32, posmask, negmask, old;				\
225	uint32_t temp1, temp2;						\
226									\
227	mem32 = round_to_word(mem);					\
228	val32.v32 = 0x00000000;						\
229	put_##N(&val32, mem, val);					\
230	posmask.v32 = 0x00000000;					\
231	put_##N(&posmask, mem, ~0);					\
232	negmask.v32 = ~posmask.v32;					\
233									\
234	mips_sync();							\
235	__asm volatile (						\
236		"1:"							\
237		"\tll	%0, %7\n"	/* Load old value. */		\
238		"\t"op"	%2, %0, %4\n"	/* Calculate new value. */	\
239		"\tand	%2, %5\n"	/* Isolate the new value. */	\
240		"\tand	%3, %6, %0\n"	/* Remove the old value. */	\
241		"\tor	%2, %3\n"	/* Put in the new value. */	\
242		"\tsc	%2, %1\n"	/* Attempt to store. */		\
243		"\tbeqz	%2, 1b\n"	/* Spin if failed. */		\
244		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
245		  "=&r" (temp2)						\
246		: "r" (val32.v32), "r" (posmask.v32),			\
247		  "r" (negmask.v32), "m" (*mem32));			\
248	return (get_##N(&old, mem));					\
249}
250
251EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "addu")
252EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "subu")
253EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "addu")
254EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "subu")
255
256#define	EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence)	\
257uintN_t									\
258__sync_##name##_##N(uintN_t *mem, uintN_t val)				\
259{									\
260	uint32_t *mem32;						\
261	reg_t val32, old;						\
262	uint32_t temp;							\
263									\
264	mem32 = round_to_word(mem);					\
265	val32.v32 = idempotence ? 0xffffffff : 0x00000000;		\
266	put_##N(&val32, mem, val);					\
267									\
268	mips_sync();							\
269	__asm volatile (						\
270		"1:"							\
271		"\tll	%0, %4\n"	/* Load old value. */		\
272		"\t"op"	%2, %3, %0\n"	/* Calculate new value. */	\
273		"\tsc	%2, %1\n"	/* Attempt to store. */		\
274		"\tbeqz	%2, 1b\n"	/* Spin if failed. */		\
275		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp)		\
276		: "r" (val32.v32), "m" (*mem32));			\
277	return (get_##N(&old, mem));					\
278}
279
280EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
281EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "or", 0)
282EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "xor", 0)
283EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
284EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "or", 0)
285EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "xor", 0)
286
287/*
288 * 32-bit routines.
289 */
290
291uint32_t
292__sync_val_compare_and_swap_4(uint32_t *mem, uint32_t expected,
293    uint32_t desired)
294{
295	uint32_t old, temp;
296
297	mips_sync();
298	__asm volatile (
299		"1:"
300		"\tll	%0, %5\n"	/* Load old value. */
301		"\tbne	%0, %3, 2f\n"	/* Compare to expected value. */
302		"\tmove	%2, %4\n"	/* Value to store. */
303		"\tsc	%2, %1\n"	/* Attempt to store. */
304		"\tbeqz	%2, 1b\n"	/* Spin if failed. */
305		"2:"
306		: "=&r" (old), "=m" (*mem), "=&r" (temp)
307		: "r" (expected), "r" (desired), "m" (*mem));
308	return (old);
309}
310
311#define	EMIT_FETCH_AND_OP_4(name, op)					\
312uint32_t								\
313__sync_##name##_4(uint32_t *mem, uint32_t val)				\
314{									\
315	uint32_t old, temp;						\
316									\
317	mips_sync();							\
318	__asm volatile (						\
319		"1:"							\
320		"\tll	%0, %4\n"	/* Load old value. */		\
321		"\t"op"\n"		/* Calculate new value. */	\
322		"\tsc	%2, %1\n"	/* Attempt to store. */		\
323		"\tbeqz	%2, 1b\n"	/* Spin if failed. */		\
324		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
325		: "r" (val), "m" (*mem));				\
326	return (old);							\
327}
328
329EMIT_FETCH_AND_OP_4(lock_test_and_set, "move %2, %3")
330EMIT_FETCH_AND_OP_4(fetch_and_add, "addu %2, %0, %3")
331EMIT_FETCH_AND_OP_4(fetch_and_and, "and %2, %0, %3")
332EMIT_FETCH_AND_OP_4(fetch_and_or, "or %2, %0, %3")
333EMIT_FETCH_AND_OP_4(fetch_and_sub, "subu %2, %0, %3")
334EMIT_FETCH_AND_OP_4(fetch_and_xor, "xor %2, %0, %3")
335
336/*
337 * 64-bit routines.
338 *
339 * Note: All the 64-bit atomic operations are only atomic when running
340 * in 64-bit mode. It is assumed that code compiled for n32 and n64 fits
341 * into this definition and no further safeties are needed.
342 */
343
344#if defined(__mips_n32) || defined(__mips_n64)
345
346uint64_t
347__sync_val_compare_and_swap_8(uint64_t *mem, uint64_t expected,
348    uint64_t desired)
349{
350	uint64_t old, temp;
351
352	mips_sync();
353	__asm volatile (
354		"1:"
355		"\tlld	%0, %5\n"	/* Load old value. */
356		"\tbne	%0, %3, 2f\n"	/* Compare to expected value. */
357		"\tmove	%2, %4\n"	/* Value to store. */
358		"\tscd	%2, %1\n"	/* Attempt to store. */
359		"\tbeqz	%2, 1b\n"	/* Spin if failed. */
360		"2:"
361		: "=&r" (old), "=m" (*mem), "=&r" (temp)
362		: "r" (expected), "r" (desired), "m" (*mem));
363	return (old);
364}
365
366#define	EMIT_FETCH_AND_OP_8(name, op)					\
367uint64_t								\
368__sync_##name##_8(uint64_t *mem, uint64_t val)				\
369{									\
370	uint64_t old, temp;						\
371									\
372	mips_sync();							\
373	__asm volatile (						\
374		"1:"							\
375		"\tlld	%0, %4\n"	/* Load old value. */		\
376		"\t"op"\n"		/* Calculate new value. */	\
377		"\tscd	%2, %1\n"	/* Attempt to store. */		\
378		"\tbeqz	%2, 1b\n"	/* Spin if failed. */		\
379		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
380		: "r" (val), "m" (*mem));				\
381	return (old);							\
382}
383
384EMIT_FETCH_AND_OP_8(lock_test_and_set, "move %2, %3")
385EMIT_FETCH_AND_OP_8(fetch_and_add, "daddu %2, %0, %3")
386EMIT_FETCH_AND_OP_8(fetch_and_and, "and %2, %0, %3")
387EMIT_FETCH_AND_OP_8(fetch_and_or, "or %2, %0, %3")
388EMIT_FETCH_AND_OP_8(fetch_and_sub, "dsubu %2, %0, %3")
389EMIT_FETCH_AND_OP_8(fetch_and_xor, "xor %2, %0, %3")
390
391#endif /* __mips_n32 || __mips_n64 */
392