1251524Sed/*-
2251524Sed * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
3251524Sed * All rights reserved.
4251524Sed *
5251524Sed * Copyright (c) 1998 Doug Rabson
6251524Sed * All rights reserved.
7251524Sed *
8251524Sed * Redistribution and use in source and binary forms, with or without
9251524Sed * modification, are permitted provided that the following conditions
10251524Sed * are met:
11251524Sed * 1. Redistributions of source code must retain the above copyright
12251524Sed *    notice, this list of conditions and the following disclaimer.
13251524Sed * 2. Redistributions in binary form must reproduce the above copyright
14251524Sed *    notice, this list of conditions and the following disclaimer in the
15251524Sed *    documentation and/or other materials provided with the distribution.
16251524Sed *
17251524Sed * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18251524Sed * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19251524Sed * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20251524Sed * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21251524Sed * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22251524Sed * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23251524Sed * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24251524Sed * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25251524Sed * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26251524Sed * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27251524Sed * SUCH DAMAGE.
28251524Sed */
29251524Sed
30251524Sed#include <sys/cdefs.h>
31251524Sed__FBSDID("$FreeBSD$");
32251524Sed
33251696Sed#include <sys/stdatomic.h>
34251524Sed#include <sys/types.h>
35251524Sed
36251524Sed#ifdef _KERNEL
37251524Sed#include "opt_global.h"
38251524Sed#endif
39251524Sed
40251696Sed#if defined(__SYNC_ATOMICS)
41251696Sed
42251524Sed/*
43251524Sed * Memory barriers.
44251524Sed *
45251524Sed * It turns out __sync_synchronize() does not emit any code when used
46251524Sed * with GCC 4.2. Implement our own version that does work reliably.
47251524Sed *
48251524Sed * Although __sync_lock_test_and_set() should only perform an acquire
49251524Sed * barrier, make it do a full barrier like the other functions. This
50251524Sed * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
51251524Sed */
52251524Sed
53251524Sedstatic inline void
54251696Seddo_sync(void)
55251524Sed{
56251524Sed
57251524Sed	__asm volatile (
58251524Sed#if !defined(_KERNEL) || defined(SMP)
59251524Sed		".set noreorder\n"
60251524Sed		"\tsync\n"
61251524Sed		"\tnop\n"
62251524Sed		"\tnop\n"
63251524Sed		"\tnop\n"
64251524Sed		"\tnop\n"
65251524Sed		"\tnop\n"
66251524Sed		"\tnop\n"
67251524Sed		"\tnop\n"
68251524Sed		"\tnop\n"
69251524Sed		".set reorder\n"
70251524Sed#else /* _KERNEL && !SMP */
71251524Sed		""
72251524Sed#endif /* !KERNEL || SMP */
73251524Sed		: : : "memory");
74251524Sed}
75251524Sed
76251524Sedtypedef union {
77251524Sed	uint8_t		v8[4];
78251524Sed	uint32_t	v32;
79251524Sed} reg_t;
80251524Sed
81251559Sed/*
82251559Sed * Given a memory address pointing to an 8-bit or 16-bit integer, return
83251559Sed * the address of the 32-bit word containing it.
84251559Sed */
85251559Sed
86251539Sedstatic inline uint32_t *
87251539Sedround_to_word(void *ptr)
88251539Sed{
89251539Sed
90251539Sed	return ((uint32_t *)((intptr_t)ptr & ~3));
91251539Sed}
92251539Sed
93251524Sed/*
94251559Sed * Utility functions for loading and storing 8-bit and 16-bit integers
95251559Sed * in 32-bit words at an offset corresponding with the location of the
96251559Sed * atomic variable.
97251524Sed */
98251524Sed
99251539Sedstatic inline void
100251696Sedput_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
101251539Sed{
102251539Sed	size_t offset;
103251539Sed
104251539Sed	offset = (intptr_t)offset_ptr & 3;
105251539Sed	r->v8[offset] = val;
106251539Sed}
107251539Sed
108251539Sedstatic inline uint8_t
109251696Sedget_1(const reg_t *r, const uint8_t *offset_ptr)
110251539Sed{
111251539Sed	size_t offset;
112251539Sed
113251539Sed	offset = (intptr_t)offset_ptr & 3;
114251539Sed	return (r->v8[offset]);
115251539Sed}
116251539Sed
117251539Sedstatic inline void
118251696Sedput_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
119251539Sed{
120251539Sed	size_t offset;
121251539Sed	union {
122251539Sed		uint16_t in;
123251539Sed		uint8_t out[2];
124251539Sed	} bytes;
125251539Sed
126251539Sed	offset = (intptr_t)offset_ptr & 3;
127251539Sed	bytes.in = val;
128251539Sed	r->v8[offset] = bytes.out[0];
129251539Sed	r->v8[offset + 1] = bytes.out[1];
130251539Sed}
131251539Sed
132251539Sedstatic inline uint16_t
133251696Sedget_2(const reg_t *r, const uint16_t *offset_ptr)
134251539Sed{
135251539Sed	size_t offset;
136251539Sed	union {
137251539Sed		uint8_t in[2];
138251539Sed		uint16_t out;
139251539Sed	} bytes;
140251539Sed
141251539Sed	offset = (intptr_t)offset_ptr & 3;
142251539Sed	bytes.in[0] = r->v8[offset];
143251539Sed	bytes.in[1] = r->v8[offset + 1];
144251539Sed	return (bytes.out);
145251539Sed}
146251539Sed
147251559Sed/*
148251559Sed * 8-bit and 16-bit routines.
149251559Sed *
150251559Sed * These operations are not natively supported by the CPU, so we use
151251559Sed * some shifting and bitmasking on top of the 32-bit instructions.
152251559Sed */
153251524Sed
154251559Sed#define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t)				\
155251559SeduintN_t									\
156251559Sed__sync_lock_test_and_set_##N(uintN_t *mem, uintN_t val)			\
157251559Sed{									\
158251559Sed	uint32_t *mem32;						\
159251559Sed	reg_t val32, negmask, old;					\
160251559Sed	uint32_t temp;							\
161251559Sed									\
162251559Sed	mem32 = round_to_word(mem);					\
163251559Sed	val32.v32 = 0x00000000;						\
164251559Sed	put_##N(&val32, mem, val);					\
165251559Sed	negmask.v32 = 0xffffffff;					\
166251559Sed	put_##N(&negmask, mem, 0);					\
167251559Sed									\
168251696Sed	do_sync();							\
169251559Sed	__asm volatile (						\
170251559Sed		"1:"							\
171251559Sed		"\tll	%0, %5\n"	/* Load old value. */		\
172251559Sed		"\tand	%2, %4, %0\n"	/* Remove the old value. */	\
173251559Sed		"\tor	%2, %3\n"	/* Put in the new value. */	\
174251559Sed		"\tsc	%2, %1\n"	/* Attempt to store. */		\
175251559Sed		"\tbeqz	%2, 1b\n"	/* Spin if failed. */		\
176251559Sed		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp)		\
177251559Sed		: "r" (val32.v32), "r" (negmask.v32), "m" (*mem32));	\
178251559Sed	return (get_##N(&old, mem));					\
179251524Sed}
180251524Sed
181251559SedEMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
182251559SedEMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
183251524Sed
184251559Sed#define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
185251559SeduintN_t									\
186251559Sed__sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected,		\
187251559Sed    uintN_t desired)							\
188251559Sed{									\
189251559Sed	uint32_t *mem32;						\
190251781Sed	reg_t expected32, desired32, posmask, old;			\
191251781Sed	uint32_t negmask, temp;						\
192251559Sed									\
193251559Sed	mem32 = round_to_word(mem);					\
194251559Sed	expected32.v32 = 0x00000000;					\
195251559Sed	put_##N(&expected32, mem, expected);				\
196251559Sed	desired32.v32 = 0x00000000;					\
197251559Sed	put_##N(&desired32, mem, desired);				\
198251559Sed	posmask.v32 = 0x00000000;					\
199251559Sed	put_##N(&posmask, mem, ~0);					\
200251781Sed	negmask = ~posmask.v32;						\
201251559Sed									\
202251696Sed	do_sync();							\
203251559Sed	__asm volatile (						\
204251559Sed		"1:"							\
205251559Sed		"\tll	%0, %7\n"	/* Load old value. */		\
206251559Sed		"\tand	%2, %5, %0\n"	/* Isolate the old value. */	\
207251559Sed		"\tbne	%2, %3, 2f\n"	/* Compare to expected value. */\
208251559Sed		"\tand	%2, %6, %0\n"	/* Remove the old value. */	\
209251559Sed		"\tor	%2, %4\n"	/* Put in the new value. */	\
210251559Sed		"\tsc	%2, %1\n"	/* Attempt to store. */		\
211251559Sed		"\tbeqz	%2, 1b\n"	/* Spin if failed. */		\
212251559Sed		"2:"							\
213251559Sed		: "=&r" (old), "=m" (*mem32), "=&r" (temp)		\
214251559Sed		: "r" (expected32.v32), "r" (desired32.v32),		\
215251781Sed		  "r" (posmask.v32), "r" (negmask), "m" (*mem32));	\
216251559Sed	return (get_##N(&old, mem));					\
217251524Sed}
218251524Sed
219251559SedEMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
220251559SedEMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
221251559Sed
222251559Sed#define	EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op)		\
223251559SeduintN_t									\
224251559Sed__sync_##name##_##N(uintN_t *mem, uintN_t val)				\
225251524Sed{									\
226251524Sed	uint32_t *mem32;						\
227251781Sed	reg_t val32, posmask, old;					\
228251781Sed	uint32_t negmask, temp1, temp2;					\
229251524Sed									\
230251559Sed	mem32 = round_to_word(mem);					\
231251524Sed	val32.v32 = 0x00000000;						\
232251559Sed	put_##N(&val32, mem, val);					\
233251559Sed	posmask.v32 = 0x00000000;					\
234251559Sed	put_##N(&posmask, mem, ~0);					\
235251781Sed	negmask = ~posmask.v32;						\
236251524Sed									\
237251696Sed	do_sync();							\
238251524Sed	__asm volatile (						\
239251524Sed		"1:"							\
240251524Sed		"\tll	%0, %7\n"	/* Load old value. */		\
241251524Sed		"\t"op"	%2, %0, %4\n"	/* Calculate new value. */	\
242251559Sed		"\tand	%2, %5\n"	/* Isolate the new value. */	\
243251559Sed		"\tand	%3, %6, %0\n"	/* Remove the old value. */	\
244251559Sed		"\tor	%2, %3\n"	/* Put in the new value. */	\
245251524Sed		"\tsc	%2, %1\n"	/* Attempt to store. */		\
246251524Sed		"\tbeqz	%2, 1b\n"	/* Spin if failed. */		\
247251524Sed		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
248251524Sed		  "=&r" (temp2)						\
249251781Sed		: "r" (val32.v32), "r" (posmask.v32), "r" (negmask),	\
250251781Sed		  "m" (*mem32));					\
251251559Sed	return (get_##N(&old, mem));					\
252251524Sed}
253251524Sed
254251559SedEMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "addu")
255251559SedEMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "subu")
256251559SedEMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "addu")
257251559SedEMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "subu")
258251524Sed
259251559Sed#define	EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence)	\
260251559SeduintN_t									\
261251559Sed__sync_##name##_##N(uintN_t *mem, uintN_t val)				\
262251524Sed{									\
263251524Sed	uint32_t *mem32;						\
264251524Sed	reg_t val32, old;						\
265251524Sed	uint32_t temp;							\
266251524Sed									\
267251559Sed	mem32 = round_to_word(mem);					\
268251524Sed	val32.v32 = idempotence ? 0xffffffff : 0x00000000;		\
269251559Sed	put_##N(&val32, mem, val);					\
270251524Sed									\
271251696Sed	do_sync();							\
272251524Sed	__asm volatile (						\
273251524Sed		"1:"							\
274251524Sed		"\tll	%0, %4\n"	/* Load old value. */		\
275251524Sed		"\t"op"	%2, %3, %0\n"	/* Calculate new value. */	\
276251524Sed		"\tsc	%2, %1\n"	/* Attempt to store. */		\
277251524Sed		"\tbeqz	%2, 1b\n"	/* Spin if failed. */		\
278251524Sed		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp)		\
279251524Sed		: "r" (val32.v32), "m" (*mem32));			\
280251559Sed	return (get_##N(&old, mem));					\
281251524Sed}
282251524Sed
283251559SedEMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
284251559SedEMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "or", 0)
285251559SedEMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "xor", 0)
286251559SedEMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
287251559SedEMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "or", 0)
288251559SedEMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "xor", 0)
289251524Sed
290251524Sed/*
291251524Sed * 32-bit routines.
292251524Sed */
293251524Sed
294251524Seduint32_t
295251524Sed__sync_val_compare_and_swap_4(uint32_t *mem, uint32_t expected,
296251524Sed    uint32_t desired)
297251524Sed{
298251524Sed	uint32_t old, temp;
299251524Sed
300251696Sed	do_sync();
301251524Sed	__asm volatile (
302251524Sed		"1:"
303251524Sed		"\tll	%0, %5\n"	/* Load old value. */
304251524Sed		"\tbne	%0, %3, 2f\n"	/* Compare to expected value. */
305251524Sed		"\tmove	%2, %4\n"	/* Value to store. */
306251524Sed		"\tsc	%2, %1\n"	/* Attempt to store. */
307251524Sed		"\tbeqz	%2, 1b\n"	/* Spin if failed. */
308251524Sed		"2:"
309251524Sed		: "=&r" (old), "=m" (*mem), "=&r" (temp)
310251524Sed		: "r" (expected), "r" (desired), "m" (*mem));
311251524Sed	return (old);
312251524Sed}
313251524Sed
314251524Sed#define	EMIT_FETCH_AND_OP_4(name, op)					\
315251524Seduint32_t								\
316251524Sed__sync_##name##_4(uint32_t *mem, uint32_t val)				\
317251524Sed{									\
318251524Sed	uint32_t old, temp;						\
319251524Sed									\
320251696Sed	do_sync();							\
321251524Sed	__asm volatile (						\
322251524Sed		"1:"							\
323251524Sed		"\tll	%0, %4\n"	/* Load old value. */		\
324251524Sed		"\t"op"\n"		/* Calculate new value. */	\
325251524Sed		"\tsc	%2, %1\n"	/* Attempt to store. */		\
326251524Sed		"\tbeqz	%2, 1b\n"	/* Spin if failed. */		\
327251524Sed		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
328251524Sed		: "r" (val), "m" (*mem));				\
329251524Sed	return (old);							\
330251524Sed}
331251524Sed
332251524SedEMIT_FETCH_AND_OP_4(lock_test_and_set, "move %2, %3")
333251524SedEMIT_FETCH_AND_OP_4(fetch_and_add, "addu %2, %0, %3")
334251524SedEMIT_FETCH_AND_OP_4(fetch_and_and, "and %2, %0, %3")
335251524SedEMIT_FETCH_AND_OP_4(fetch_and_or, "or %2, %0, %3")
336251524SedEMIT_FETCH_AND_OP_4(fetch_and_sub, "subu %2, %0, %3")
337251524SedEMIT_FETCH_AND_OP_4(fetch_and_xor, "xor %2, %0, %3")
338251524Sed
339251524Sed/*
340251524Sed * 64-bit routines.
341251524Sed *
342251524Sed * Note: All the 64-bit atomic operations are only atomic when running
343251524Sed * in 64-bit mode. It is assumed that code compiled for n32 and n64 fits
344251524Sed * into this definition and no further safeties are needed.
345251524Sed */
346251524Sed
347251524Sed#if defined(__mips_n32) || defined(__mips_n64)
348251524Sed
349251524Seduint64_t
350251524Sed__sync_val_compare_and_swap_8(uint64_t *mem, uint64_t expected,
351251524Sed    uint64_t desired)
352251524Sed{
353251524Sed	uint64_t old, temp;
354251524Sed
355251696Sed	do_sync();
356251524Sed	__asm volatile (
357251524Sed		"1:"
358251524Sed		"\tlld	%0, %5\n"	/* Load old value. */
359251524Sed		"\tbne	%0, %3, 2f\n"	/* Compare to expected value. */
360251524Sed		"\tmove	%2, %4\n"	/* Value to store. */
361251524Sed		"\tscd	%2, %1\n"	/* Attempt to store. */
362251524Sed		"\tbeqz	%2, 1b\n"	/* Spin if failed. */
363251524Sed		"2:"
364251524Sed		: "=&r" (old), "=m" (*mem), "=&r" (temp)
365251524Sed		: "r" (expected), "r" (desired), "m" (*mem));
366251524Sed	return (old);
367251524Sed}
368251524Sed
369251524Sed#define	EMIT_FETCH_AND_OP_8(name, op)					\
370251524Seduint64_t								\
371251524Sed__sync_##name##_8(uint64_t *mem, uint64_t val)				\
372251524Sed{									\
373251524Sed	uint64_t old, temp;						\
374251524Sed									\
375251696Sed	do_sync();							\
376251524Sed	__asm volatile (						\
377251524Sed		"1:"							\
378251524Sed		"\tlld	%0, %4\n"	/* Load old value. */		\
379251524Sed		"\t"op"\n"		/* Calculate new value. */	\
380251524Sed		"\tscd	%2, %1\n"	/* Attempt to store. */		\
381251524Sed		"\tbeqz	%2, 1b\n"	/* Spin if failed. */		\
382251524Sed		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
383251524Sed		: "r" (val), "m" (*mem));				\
384251524Sed	return (old);							\
385251524Sed}
386251524Sed
387251524SedEMIT_FETCH_AND_OP_8(lock_test_and_set, "move %2, %3")
388251524SedEMIT_FETCH_AND_OP_8(fetch_and_add, "daddu %2, %0, %3")
389251524SedEMIT_FETCH_AND_OP_8(fetch_and_and, "and %2, %0, %3")
390251524SedEMIT_FETCH_AND_OP_8(fetch_and_or, "or %2, %0, %3")
391251524SedEMIT_FETCH_AND_OP_8(fetch_and_sub, "dsubu %2, %0, %3")
392251524SedEMIT_FETCH_AND_OP_8(fetch_and_xor, "xor %2, %0, %3")
393251524Sed
394251524Sed#endif /* __mips_n32 || __mips_n64 */
395251696Sed
396251696Sed#endif /* __SYNC_ATOMICS */
397