1/*	$OpenBSD: sync.S,v 1.3 2017/08/20 11:12:42 visa Exp $	*/
2
3/*
4 * Copyright (c) 2015 Visa Hankala
5 *
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19/*
20 * The hardware can do 4-byte and 8-byte atomic operations directly through
21 * the ll/sc and lld/scd instructions. 1-byte and 2-byte atomic operations have
22 * to be emulated. The emulation uses 4-byte atomic updates where the change is
23 * confined to the desired 1-byte or 2-byte subword.
24 */
25
26	.set noreorder
27
28#define LEAF(n) \
29	.align 3; \
30	.globl n; \
31	.ent n, 0; \
32n:
33
34#define END(n) \
35	.end n
36
37/* Convert offset in memory to offset in machine word. */
38#ifdef __MIPSEB__
39#define GET_WORD_OFFSET(amask, r)	xori r, r, amask
40#else
41#define GET_WORD_OFFSET(amask, r)	/* nothing */
42#endif
43
44#define SYNC_EMUL_INIT(amask, vmask) \
45	andi	$t0, $a0, amask;	/* Get byte offset. */ \
46	xor	$a0, $a0, $t0;		/* Align the address. */ \
47	GET_WORD_OFFSET(amask, $t0); \
48	sll	$t0, $t0, 3;		/* Multiply by 8 to get bit shift. */ \
49	li	$t1, vmask; \
50	sll	$t1, $t1, $t0;		/* Make positive mask. */ \
51	nor	$t2, $t1, $0		/* Make negative mask. */
52
53#define NO_NEG
54#define NEG_v0	nor $v0, $v0, $0
55#define NEG_v1	nor $v1, $v1, $0
56
57#define NO_TRUNC
58#define TRUNC_32(r, t) \
59	lui	t, 0xffff; \
60	ori	t, t, 0xffff; \
61	and	r, r, t
62
63/*
64 * type __sync_fetch_and_<op>_<N>(type *ptr, type value)
65 */
66
67#define SYNC_FETCH_AND_OP(op, n, ll, sc, inst, neg) \
68LEAF(__sync_fetch_and_##op##_##n); \
69	sync; \
701:	##ll	$v0, ($a0); \
71	##inst	$v1, $v0, $a1; \
72	##neg; \
73	##sc	$v1, ($a0); \
74	beq	$v1, $0, 1b; \
75	 nop; \
76	sync; \
77	j	$ra; \
78	 nop; \
79END(__sync_fetch_and_##op##_##n)
80
81SYNC_FETCH_AND_OP(add, 8, lld, scd, daddu, NO_NEG)
82SYNC_FETCH_AND_OP(sub, 8, lld, scd, dsubu, NO_NEG)
83SYNC_FETCH_AND_OP(or, 8, lld, scd, or, NO_NEG)
84SYNC_FETCH_AND_OP(and, 8, lld, scd, and, NO_NEG)
85SYNC_FETCH_AND_OP(xor, 8, lld, scd, xor, NO_NEG)
86SYNC_FETCH_AND_OP(nand, 8, lld, scd, and, NEG_v1)
87
88SYNC_FETCH_AND_OP(add, 4, ll, sc, addu, NO_NEG)
89SYNC_FETCH_AND_OP(sub, 4, ll, sc, subu, NO_NEG)
90SYNC_FETCH_AND_OP(or, 4, ll, sc, or, NO_NEG)
91SYNC_FETCH_AND_OP(and, 4, ll, sc, and, NO_NEG)
92SYNC_FETCH_AND_OP(xor, 4, ll, sc, xor, NO_NEG)
93SYNC_FETCH_AND_OP(nand, 4, ll, sc, and, NEG_v1)
94
95#define SYNC_FETCH_AND_OP_EMUL(op, n, inst, neg, amask, vmask) \
96LEAF(__sync_fetch_and_##op##_##n); \
97	SYNC_EMUL_INIT(amask, vmask); \
98	sll	$a1, $a1, $t0;		/* Align the parameter. */ \
99	and	$a1, $a1, $t1;		/* Normalize the parameter. */ \
100	sync; \
1011:	ll	$v0, ($a0); \
102	##inst	$v1, $v0, $a1; \
103	##neg; \
104	and	$v1, $v1, $t1;		/* Get the new bits. */ \
105	and	$t3, $v0, $t2;		/* Get the old bits. */ \
106	or	$v1, $v1, $t3;		/* Combine the result. */ \
107	sc	$v1, ($a0); \
108	beq	$v1, $0, 1b; \
109	 nop; \
110	sync; \
111	and	$v0, $v0, $t1;		/* Get the old value. */ \
112	j	$ra; \
113	 srl	$v0, $v0, $t0;		/* Remove the shift. */ \
114END(__sync_fetch_and_##op##_##n)
115
116SYNC_FETCH_AND_OP_EMUL(add, 2, addu, NO_NEG, 2, 0xffff)
117SYNC_FETCH_AND_OP_EMUL(sub, 2, subu, NO_NEG, 2, 0xffff)
118SYNC_FETCH_AND_OP_EMUL(or, 2, or, NO_NEG, 2, 0xffff)
119SYNC_FETCH_AND_OP_EMUL(and, 2, and, NO_NEG, 2, 0xffff)
120SYNC_FETCH_AND_OP_EMUL(xor, 2, xor, NO_NEG, 2, 0xffff)
121SYNC_FETCH_AND_OP_EMUL(nand, 2, and, NEG_v1, 2, 0xffff)
122
123SYNC_FETCH_AND_OP_EMUL(add, 1, addu, NO_NEG, 3, 0xff)
124SYNC_FETCH_AND_OP_EMUL(sub, 1, subu, NO_NEG, 3, 0xff)
125SYNC_FETCH_AND_OP_EMUL(or, 1, or, NO_NEG, 3, 0xff)
126SYNC_FETCH_AND_OP_EMUL(and, 1, and, NO_NEG, 3, 0xff)
127SYNC_FETCH_AND_OP_EMUL(xor, 1, xor, NO_NEG, 3, 0xff)
128SYNC_FETCH_AND_OP_EMUL(nand, 1, and, NEG_v1, 3, 0xff)
129
130/*
131 * type __sync_<op>_and_fetch_<N>(type *ptr, type value)
132 */
133
134#define SYNC_OP_AND_FETCH(op, n, ll, sc, inst, neg) \
135LEAF(__sync_##op##_and_fetch_##n); \
136	sync; \
1371:	##ll	$v0, ($a0); \
138	##inst	$v0, $v0, $a1; \
139	##neg; \
140	move	$v1, $v0; \
141	##sc	$v1, ($a0); \
142	beq	$v1, $0, 1b; \
143	 nop; \
144	sync; \
145	j	$ra; \
146	 nop; \
147END(__sync_##op##_and_fetch_##n)
148
149SYNC_OP_AND_FETCH(add, 8, lld, scd, daddu, NO_NEG)
150SYNC_OP_AND_FETCH(sub, 8, lld, scd, dsubu, NO_NEG)
151SYNC_OP_AND_FETCH(or, 8, lld, scd, or, NO_NEG)
152SYNC_OP_AND_FETCH(and, 8, lld, scd, and, NO_NEG)
153SYNC_OP_AND_FETCH(xor, 8, lld, scd, xor, NO_NEG)
154SYNC_OP_AND_FETCH(nand, 8, lld, scd, and, NEG_v0)
155
156SYNC_OP_AND_FETCH(add, 4, ll, sc, addu, NO_NEG)
157SYNC_OP_AND_FETCH(sub, 4, ll, sc, subu, NO_NEG)
158SYNC_OP_AND_FETCH(or, 4, ll, sc, or, NO_NEG)
159SYNC_OP_AND_FETCH(and, 4, ll, sc, and, NO_NEG)
160SYNC_OP_AND_FETCH(xor, 4, ll, sc, xor, NO_NEG)
161SYNC_OP_AND_FETCH(nand, 4, ll, sc, and, NEG_v0)
162
163#define SYNC_OP_AND_FETCH_EMUL(op, n, inst, neg, amask, vmask) \
164LEAF(__sync_##op##_and_fetch_##n); \
165	SYNC_EMUL_INIT(amask, vmask); \
166	sll	$a1, $a1, $t0;		/* Align the parameter. */ \
167	and	$a1, $a1, $t1;		/* Normalize the parameter. */ \
168	sync; \
1691:	ll	$v0, ($a0); \
170	##inst	$v1, $v0, $a1; \
171	##neg; \
172	and	$t3, $v1, $t1;		/* Get the new bits. */ \
173	and	$v0, $v0, $t2;		/* Get the old bits. */ \
174	or	$v0, $v0, $t3;		/* Combine the result. */ \
175	move	$v1, $v0; \
176	sc	$v1, ($a0); \
177	beq	$v1, $0, 1b; \
178	 nop; \
179	sync; \
180	j	$ra; \
181	 srl	$v0, $t3, $t0;		/* Remove the shift. */ \
182END(__sync_##op##_and_fetch_##n)
183
184SYNC_OP_AND_FETCH_EMUL(add, 2, addu, NO_NEG, 2, 0xffff)
185SYNC_OP_AND_FETCH_EMUL(sub, 2, subu, NO_NEG, 2, 0xffff)
186SYNC_OP_AND_FETCH_EMUL(or, 2, or, NO_NEG, 2, 0xffff)
187SYNC_OP_AND_FETCH_EMUL(and, 2, and, NO_NEG, 2, 0xffff)
188SYNC_OP_AND_FETCH_EMUL(xor, 2, xor, NO_NEG, 2, 0xffff)
189SYNC_OP_AND_FETCH_EMUL(nand, 2, and, NEG_v1, 2, 0xffff)
190
191SYNC_OP_AND_FETCH_EMUL(add, 1, addu, NO_NEG, 3, 0xff)
192SYNC_OP_AND_FETCH_EMUL(sub, 1, subu, NO_NEG, 3, 0xff)
193SYNC_OP_AND_FETCH_EMUL(or, 1, or, NO_NEG, 3, 0xff)
194SYNC_OP_AND_FETCH_EMUL(and, 1, and, NO_NEG, 3, 0xff)
195SYNC_OP_AND_FETCH_EMUL(xor, 1, xor, NO_NEG, 3, 0xff)
196SYNC_OP_AND_FETCH_EMUL(nand, 1, and, NEG_v1, 3, 0xff)
197
198/*
199 * type __sync_bool_compare_and_swap_<N>(type *ptr, type oldv, type newv)
200 */
201
202#define SYNC_BOOL_COMPARE_AND_SWAP(n, ll, sc, trunc) \
203LEAF(__sync_bool_compare_and_swap_##n); \
204	trunc; \
205	sync; \
2061:	##ll	$v0, ($a0); \
207	bne	$v0, $a1, 2f; \
208	 move	$v1, $a2; \
209	##sc	$v1, ($a0); \
210	beq	$v1, $0, 1b; \
211	 nop; \
212	sync; \
213	j	$ra; \
214	 li	$v0, 1; \
2152:	j	$ra; \
216	 li	$v0, 0; \
217END(__sync_bool_compare_and_swap_##n)
218
219SYNC_BOOL_COMPARE_AND_SWAP(8, lld, scd, NO_TRUNC)
220SYNC_BOOL_COMPARE_AND_SWAP(4, ll, sc, TRUNC_32($a1, $t0))
221
222#define SYNC_BOOL_COMPARE_AND_SWAP_EMUL(n, amask, vmask) \
223LEAF(__sync_bool_compare_and_swap_##n); \
224	SYNC_EMUL_INIT(amask, vmask); \
225	/* Align and normalize the parameters. */ \
226	sll	$a1, $a1, $t0; \
227	and	$a1, $a1, $t1; \
228	sll	$a2, $a2, $t0; \
229	and	$a2, $a2, $t1; \
230	/* Do the update. */ \
231	sync; \
2321:	ll	$v0, ($a0); \
233	and	$v1, $v0, $t1;		/* Get the old value. */ \
234	bne	$v1, $a1, 2f; \
235	 and	$v1, $v0, $t2;		/* Clear the old value. */ \
236	or	$v1, $v1, $a2;		/* Insert the new value. */ \
237	sc	$v1, ($a0); \
238	beq	$v1, $0, 1b; \
239	 nop; \
240	sync; \
241	j	$ra; \
242	 li	$v0, 1; \
2432:	j	$ra; \
244	 li	$v0, 0; \
245END(__sync_bool_compare_and_swap_##n)
246
247SYNC_BOOL_COMPARE_AND_SWAP_EMUL(2, 2, 0xffff)
248SYNC_BOOL_COMPARE_AND_SWAP_EMUL(1, 3, 0xff)
249
250/*
251 * type __sync_val_compare_and_swap_<N>(type *ptr, type oldv, type newv)
252 */
253
254#define SYNC_VAL_COMPARE_AND_SWAP(n, ll, sc, trunc) \
255LEAF(__sync_val_compare_and_swap_##n); \
256	trunc; \
257	sync; \
2581:	##ll	$v0, ($a0); \
259	bne	$v0, $a1, 2f; \
260	 move	$v1, $a2; \
261	##sc	$v1, ($a0); \
262	beq	$v1, $0, 1b; \
263	 nop; \
264	sync; \
2652:	j	$ra; \
266	 nop; \
267END(__sync_val_compare_and_swap_##n)
268
269SYNC_VAL_COMPARE_AND_SWAP(8, lld, scd, NO_TRUNC)
270SYNC_VAL_COMPARE_AND_SWAP(4, ll, sc, TRUNC_32($a1, $t0))
271
272#define SYNC_VAL_COMPARE_AND_SWAP_EMUL(n, amask, vmask) \
273LEAF(__sync_val_compare_and_swap_##n); \
274	SYNC_EMUL_INIT(amask, vmask); \
275	/* Align and normalize the parameters. */ \
276	sll	$a1, $a1, $t0; \
277	and	$a1, $a1, $t1; \
278	sll	$a2, $a2, $t0; \
279	and	$a2, $a2, $t1; \
280	/* Do the update. */ \
281	sync; \
2821:	ll	$v0, ($a0); \
283	and	$t3, $v0, $t1;		/* Get the old value. */ \
284	bne	$t3, $a1, 2f; \
285	 and	$v1, $v0, $t2;		/* Clear the old value. */ \
286	or	$v1, $v1, $a2;		/* Insert the new value. */ \
287	sc	$v1, ($a0); \
288	beq	$v1, $0, 1b; \
289	 nop; \
290	sync; \
2912:	j	$ra; \
292	 srl	$v0, $t3, $t0;		/* Remove the shift. */ \
293END(__sync_val_compare_and_swap_##n)
294
295SYNC_VAL_COMPARE_AND_SWAP_EMUL(2, 2, 0xffff)
296SYNC_VAL_COMPARE_AND_SWAP_EMUL(1, 3, 0xff)
297
298/*
299 * type __sync_lock_test_and_set_<N>(type *ptr, type value)
300 */
301
302#define SYNC_LOCK_TEST_AND_SET(n, ll, sc) \
303LEAF(__sync_lock_test_and_set_##n); \
304	sync; \
3051:	move	$v1, $a1; \
306	##ll	$v0, ($a0); \
307	##sc	$v1, ($a0); \
308	beq	$v1, $0, 1b; \
309	 nop; \
310	sync; \
311	j	$ra; \
312	 nop; \
313END(__sync_lock_test_and_set_##n)
314
315SYNC_LOCK_TEST_AND_SET(8, lld, scd)
316SYNC_LOCK_TEST_AND_SET(4, ll, sc)
317
318#define SYNC_LOCK_TEST_AND_SET_EMUL(n, amask, vmask) \
319LEAF(__sync_lock_test_and_set_##n); \
320	SYNC_EMUL_INIT(amask, vmask); \
321	sll	$a1, $a1, $t0;		/* Align the parameter. */ \
322	and	$a1, $a1, $t1;		/* Normalize the parameter. */ \
323	sync; \
3241:	ll	$v0, ($a0); \
325	and	$v1, $v0, $t2;		/* Clear the old value. */ \
326	or	$v1, $v1, $a1;		/* Insert the new value. */ \
327	sc	$v1, ($a0); \
328	beq	$v1, $0, 1b; \
329	 nop; \
330	sync; \
331	and	$v0, $v0, $t1;		/* Get the old value. */ \
332	j	$ra; \
333	 srl	$v0, $v0, $t0;		/* Remove the shift. */ \
334END(__sync_lock_test_and_set_##n)
335
336SYNC_LOCK_TEST_AND_SET_EMUL(2, 2, 0xffff)
337SYNC_LOCK_TEST_AND_SET_EMUL(1, 3, 0xff)
338