atomic.c revision 1.7
1/* TILE atomics.
2   Copyright (C) 2011-2018 Free Software Foundation, Inc.
3   Contributed by Walter Lee (walt@tilera.com)
4
5   This file is free software; you can redistribute it and/or modify it
6   under the terms of the GNU General Public License as published by the
7   Free Software Foundation; either version 3, or (at your option) any
8   later version.
9
10   This file is distributed in the hope that it will be useful, but
11   WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   General Public License for more details.
14
15   Under Section 7 of GPL version 3, you are granted additional
16   permissions described in the GCC Runtime Library Exception, version
17   3.1, as published by the Free Software Foundation.
18
19   You should have received a copy of the GNU General Public License and
20   a copy of the GCC Runtime Library Exception along with this program;
21   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22   <http://www.gnu.org/licenses/>.  */
23
24#include "tconfig.h"
25#include "coretypes.h"
26#include "atomic.h"
27
28#define bool unsigned char
29
30/* This code should be inlined by the compiler, but for now support
31   it as out-of-line methods in libgcc.  */
32
33static inline void
34pre_atomic_barrier (int model)
35{
36  switch (model)
37    {
38    case __ATOMIC_RELEASE:
39    case __ATOMIC_ACQ_REL:
40    case __ATOMIC_SEQ_CST:
41      __atomic_thread_fence (model);
42      break;
43    default:
44      break;
45    }
46  return;
47}
48
49static inline void
50post_atomic_barrier (int model)
51{
52  switch (model)
53    {
54    case __ATOMIC_ACQUIRE:
55    case __ATOMIC_ACQ_REL:
56    case __ATOMIC_SEQ_CST:
57      __atomic_thread_fence (model);
58      break;
59    default:
60      break;
61    }
62  return;
63}
64
65#define __unused __attribute__((unused))
66
67#define __fetch_and_do(proto, type, size, opname, top, bottom)	\
68proto								\
69{								\
70  top;								\
71  type rv = arch_atomic_##opname(p, i);				\
72  bottom;							\
73  return rv;							\
74}
75
76#define __atomic_fetch_and_do(type, size, opname)			\
77  __fetch_and_do(type __atomic_fetch_##opname##_##size(type* p, type i, int model), \
78		 type, size, opname,					\
79		 pre_atomic_barrier(model),				\
80		 post_atomic_barrier(model))				\
81
82__atomic_fetch_and_do (int, 4, add)
83__atomic_fetch_and_do (int, 4, sub)
84__atomic_fetch_and_do (int, 4, or)
85__atomic_fetch_and_do (int, 4, and)
86__atomic_fetch_and_do (int, 4, xor)
87__atomic_fetch_and_do (int, 4, nand)
88__atomic_fetch_and_do (long long, 8, add)
89__atomic_fetch_and_do (long long, 8, sub)
90__atomic_fetch_and_do (long long, 8, or)
91__atomic_fetch_and_do (long long, 8, and)
92__atomic_fetch_and_do (long long, 8, xor)
93__atomic_fetch_and_do (long long, 8, nand)
94
95#define __sync_fetch_and_do(type, size, opname)				\
96  __fetch_and_do(type __sync_fetch_and_##opname##_##size(type* p, type i), \
97		 type, size, opname,					\
98		 arch_atomic_write_barrier(),				\
99		 arch_atomic_read_barrier())
100
101__sync_fetch_and_do (int, 4, add)
102__sync_fetch_and_do (int, 4, sub)
103__sync_fetch_and_do (int, 4, or)
104__sync_fetch_and_do (int, 4, and)
105__sync_fetch_and_do (int, 4, xor)
106__sync_fetch_and_do (int, 4, nand)
107__sync_fetch_and_do (long long, 8, add)
108__sync_fetch_and_do (long long, 8, sub)
109__sync_fetch_and_do (long long, 8, or)
110__sync_fetch_and_do (long long, 8, and)
111__sync_fetch_and_do (long long, 8, xor)
112__sync_fetch_and_do (long long, 8, nand)
113
114#define __do_and_fetch(proto, type, size, opname, op, op2, top, bottom)	\
115proto									\
116{									\
117  top;									\
118  type rv = op2 (arch_atomic_##opname(p, i) op i);			\
119  bottom;								\
120  return rv;								\
121}
122
123#define __atomic_do_and_fetch(type, size, opname, op, op2)		\
124  __do_and_fetch(type __atomic_##opname##_fetch_##size(type* p, type i, int model), \
125		 type, size, opname, op, op2,				\
126		 pre_atomic_barrier(model),				\
127		 post_atomic_barrier(model))				\
128
129__atomic_do_and_fetch (int, 4, add, +, )
130__atomic_do_and_fetch (int, 4, sub, -, )
131__atomic_do_and_fetch (int, 4, or, |, )
132__atomic_do_and_fetch (int, 4, and, &, )
133__atomic_do_and_fetch (int, 4, xor, |, )
134__atomic_do_and_fetch (int, 4, nand, &, ~)
135__atomic_do_and_fetch (long long, 8, add, +, )
136__atomic_do_and_fetch (long long, 8, sub, -, )
137__atomic_do_and_fetch (long long, 8, or, |, )
138__atomic_do_and_fetch (long long, 8, and, &, )
139__atomic_do_and_fetch (long long, 8, xor, |, )
140__atomic_do_and_fetch (long long, 8, nand, &, ~)
141
142#define __sync_do_and_fetch(type, size, opname, op, op2)		\
143  __do_and_fetch(type __sync_##opname##_and_fetch_##size(type* p, type i), \
144		 type, size, opname, op, op2,				\
145		 arch_atomic_write_barrier(),				\
146		 arch_atomic_read_barrier())				\
147
148__sync_do_and_fetch (int, 4, add, +, )
149__sync_do_and_fetch (int, 4, sub, -, )
150__sync_do_and_fetch (int, 4, or, |, )
151__sync_do_and_fetch (int, 4, and, &, )
152__sync_do_and_fetch (int, 4, xor, |, )
153__sync_do_and_fetch (int, 4, nand, &, ~)
154__sync_do_and_fetch (long long, 8, add, +, )
155__sync_do_and_fetch (long long, 8, sub, -, )
156__sync_do_and_fetch (long long, 8, or, |, )
157__sync_do_and_fetch (long long, 8, and, &, )
158__sync_do_and_fetch (long long, 8, xor, |, )
159__sync_do_and_fetch (long long, 8, nand, &, ~)
160
161#define __atomic_exchange_methods(type, size)				\
162bool									\
163__atomic_compare_exchange_##size(volatile type* ptr, type* oldvalp,	\
164				 type newval, bool weak __unused,	\
165				 int models, int modelf __unused)	\
166{									\
167  type oldval = *oldvalp;						\
168  pre_atomic_barrier(models);						\
169  type retval = arch_atomic_val_compare_and_exchange(ptr, oldval, newval); \
170  post_atomic_barrier(models);						\
171  bool success = (retval == oldval);					\
172  *oldvalp = retval;							\
173  return success;							\
174}									\
175									\
176type									\
177__atomic_exchange_##size(volatile type* ptr, type val, int model)	\
178{									\
179  pre_atomic_barrier(model);						\
180  type retval = arch_atomic_exchange(ptr, val);				\
181  post_atomic_barrier(model);						\
182  return retval;							\
183}
184
185__atomic_exchange_methods (int, 4)
186__atomic_exchange_methods (long long, 8)
187
188#define __sync_exchange_methods(type, size)				\
189type									\
190__sync_val_compare_and_swap_##size(type* ptr, type oldval, type newval)	\
191{									\
192  arch_atomic_write_barrier();						\
193  type retval = arch_atomic_val_compare_and_exchange(ptr, oldval, newval); \
194  arch_atomic_read_barrier();						\
195  return retval;							\
196}									\
197									\
198bool									\
199__sync_bool_compare_and_swap_##size(type* ptr, type oldval, type newval) \
200{									\
201  arch_atomic_write_barrier();						\
202  bool retval = arch_atomic_bool_compare_and_exchange(ptr, oldval, newval); \
203  arch_atomic_read_barrier();						\
204  return retval;							\
205}									\
206									\
207type									\
208__sync_lock_test_and_set_##size(type* ptr, type val)			\
209{									\
210  type retval = arch_atomic_exchange(ptr, val);				\
211  arch_atomic_acquire_barrier_value(retval);				\
212  return retval;							\
213}
214
215__sync_exchange_methods (int, 4)
216__sync_exchange_methods (long long, 8)
217
218#ifdef __LITTLE_ENDIAN__
219#define BIT_OFFSET(n, type) ((n) * 8)
220#else
221#define BIT_OFFSET(n, type) ((4 - sizeof(type) - (n)) * 8)
222#endif
223
224/* Subword methods require the same approach for both TILEPro and
225   TILE-Gx.  We load the background data for the word, insert the
226   desired subword piece, then compare-and-exchange it into place.  */
227#define u8 unsigned char
228#define u16 unsigned short
229
230#define __subword_cmpxchg_body(type, size, ptr, guess, val)		\
231  ({									\
232    unsigned int *p = (unsigned int *)((unsigned long)ptr & ~3UL);	\
233    const int shift = BIT_OFFSET((unsigned long)ptr & 3UL, type);	\
234    const unsigned int valmask = (1 << (sizeof(type) * 8)) - 1;		\
235    const unsigned int bgmask = ~(valmask << shift);			\
236    unsigned int oldword = *p;						\
237    type oldval = (oldword >> shift) & valmask;				\
238    if (__builtin_expect((oldval == guess), 1)) {			\
239      unsigned int word = (oldword & bgmask) | ((val & valmask) << shift); \
240      oldword = arch_atomic_val_compare_and_exchange(p, oldword, word);	\
241      oldval = (oldword >> shift) & valmask;				\
242    }									\
243    oldval;								\
244  })									\
245
246#define __atomic_subword_cmpxchg(type, size)				\
247  									\
248bool									\
249__atomic_compare_exchange_##size(volatile type* ptr, type* guess_ptr,	\
250				 type val, bool weak __unused, int models, \
251				 int modelf __unused)			\
252{									\
253  pre_atomic_barrier(models);						\
254  type guess = *guess_ptr;						\
255  type oldval = __subword_cmpxchg_body(type, size, ptr, guess, val);	\
256  post_atomic_barrier(models);						\
257  bool success = (oldval == guess);					\
258  *guess_ptr = oldval;							\
259  return success;							\
260}
261
262__atomic_subword_cmpxchg (u8, 1)
263__atomic_subword_cmpxchg (u16, 2)
264
265#define __sync_subword_cmpxchg(type, size)				\
266  									\
267type									\
268__sync_val_compare_and_swap_##size(type* ptr, type guess, type val)	\
269{									\
270  arch_atomic_write_barrier();						\
271  type oldval = __subword_cmpxchg_body(type, size, ptr, guess, val);	\
272  arch_atomic_read_barrier();						\
273  return oldval;							\
274}									\
275									\
276bool									\
277__sync_bool_compare_and_swap_##size(type* ptr, type guess, type val)	\
278{									\
279  type oldval = __sync_val_compare_and_swap_##size(ptr, guess, val);	\
280  return oldval == guess;						\
281}
282
283__sync_subword_cmpxchg (u8, 1)
284__sync_subword_cmpxchg (u16, 2)
285
286/* For the atomic-update subword methods, we use the same approach as
287   above, but we retry until we succeed if the compare-and-exchange
288   fails.  */
289#define __subword(type, proto, top, expr, bottom)			\
290proto									\
291{									\
292  top									\
293  unsigned int *p = (unsigned int *)((unsigned long)ptr & ~3UL);	\
294  const int shift = BIT_OFFSET((unsigned long)ptr & 3UL, type);		\
295  const unsigned int valmask = (1 << (sizeof(type) * 8)) - 1;		\
296  const unsigned int bgmask = ~(valmask << shift);			\
297  unsigned int oldword, xword = *p;					\
298  type val, oldval;							\
299  do {									\
300    oldword = xword;							\
301    oldval = (oldword >> shift) & valmask;				\
302    val = expr;								\
303    unsigned int word = (oldword & bgmask) | ((val & valmask) << shift); \
304    xword = arch_atomic_val_compare_and_exchange(p, oldword, word);	\
305  } while (__builtin_expect(xword != oldword, 0));			\
306  bottom								\
307}
308
309#define __atomic_subword_fetch(type, funcname, expr, retval)		\
310  __subword(type,							\
311	    type __atomic_ ## funcname(volatile type *ptr, type i, int model), \
312	    pre_atomic_barrier(model);,					\
313	    expr,							\
314	    post_atomic_barrier(model); return retval;)
315
316__atomic_subword_fetch (u8, fetch_add_1, oldval + i, oldval)
317__atomic_subword_fetch (u8, fetch_sub_1, oldval - i, oldval)
318__atomic_subword_fetch (u8, fetch_or_1, oldval | i, oldval)
319__atomic_subword_fetch (u8, fetch_and_1, oldval & i, oldval)
320__atomic_subword_fetch (u8, fetch_xor_1, oldval ^ i, oldval)
321__atomic_subword_fetch (u8, fetch_nand_1, ~(oldval & i), oldval)
322
323__atomic_subword_fetch (u16, fetch_add_2, oldval + i, oldval)
324__atomic_subword_fetch (u16, fetch_sub_2, oldval - i, oldval)
325__atomic_subword_fetch (u16, fetch_or_2, oldval | i, oldval)
326__atomic_subword_fetch (u16, fetch_and_2, oldval & i, oldval)
327__atomic_subword_fetch (u16, fetch_xor_2, oldval ^ i, oldval)
328__atomic_subword_fetch (u16, fetch_nand_2, ~(oldval & i), oldval)
329
330__atomic_subword_fetch (u8, add_fetch_1, oldval + i, val)
331__atomic_subword_fetch (u8, sub_fetch_1, oldval - i, val)
332__atomic_subword_fetch (u8, or_fetch_1, oldval | i, val)
333__atomic_subword_fetch (u8, and_fetch_1, oldval & i, val)
334__atomic_subword_fetch (u8, xor_fetch_1, oldval ^ i, val)
335__atomic_subword_fetch (u8, nand_fetch_1, ~(oldval & i), val)
336
337__atomic_subword_fetch (u16, add_fetch_2, oldval + i, val)
338__atomic_subword_fetch (u16, sub_fetch_2, oldval - i, val)
339__atomic_subword_fetch (u16, or_fetch_2, oldval | i, val)
340__atomic_subword_fetch (u16, and_fetch_2, oldval & i, val)
341__atomic_subword_fetch (u16, xor_fetch_2, oldval ^ i, val)
342__atomic_subword_fetch (u16, nand_fetch_2, ~(oldval & i), val)
343
344#define __sync_subword_fetch(type, funcname, expr, retval)	\
345  __subword(type,						\
346	    type __sync_ ## funcname(type *ptr, type i),	\
347	    arch_atomic_read_barrier();,			\
348	    expr,						\
349	    arch_atomic_write_barrier(); return retval;)
350
351__sync_subword_fetch (u8, fetch_and_add_1, oldval + i, oldval)
352__sync_subword_fetch (u8, fetch_and_sub_1, oldval - i, oldval)
353__sync_subword_fetch (u8, fetch_and_or_1, oldval | i, oldval)
354__sync_subword_fetch (u8, fetch_and_and_1, oldval & i, oldval)
355__sync_subword_fetch (u8, fetch_and_xor_1, oldval ^ i, oldval)
356__sync_subword_fetch (u8, fetch_and_nand_1, ~(oldval & i), oldval)
357
358__sync_subword_fetch (u16, fetch_and_add_2, oldval + i, oldval)
359__sync_subword_fetch (u16, fetch_and_sub_2, oldval - i, oldval)
360__sync_subword_fetch (u16, fetch_and_or_2, oldval | i, oldval)
361__sync_subword_fetch (u16, fetch_and_and_2, oldval & i, oldval)
362__sync_subword_fetch (u16, fetch_and_xor_2, oldval ^ i, oldval)
363__sync_subword_fetch (u16, fetch_and_nand_2, ~(oldval & i), oldval)
364
365__sync_subword_fetch (u8, add_and_fetch_1, oldval + i, val)
366__sync_subword_fetch (u8, sub_and_fetch_1, oldval - i, val)
367__sync_subword_fetch (u8, or_and_fetch_1, oldval | i, val)
368__sync_subword_fetch (u8, and_and_fetch_1, oldval & i, val)
369__sync_subword_fetch (u8, xor_and_fetch_1, oldval ^ i, val)
370__sync_subword_fetch (u8, nand_and_fetch_1, ~(oldval & i), val)
371
372__sync_subword_fetch (u16, add_and_fetch_2, oldval + i, val)
373__sync_subword_fetch (u16, sub_and_fetch_2, oldval - i, val)
374__sync_subword_fetch (u16, or_and_fetch_2, oldval | i, val)
375__sync_subword_fetch (u16, and_and_fetch_2, oldval & i, val)
376__sync_subword_fetch (u16, xor_and_fetch_2, oldval ^ i, val)
377__sync_subword_fetch (u16, nand_and_fetch_2, ~(oldval & i), val)
378
379#define __atomic_subword_lock(type, size)				\
380  __subword(type,							\
381	    type __atomic_exchange_##size(volatile type* ptr, type nval, int model), \
382	    pre_atomic_barrier(model);,					\
383	    nval,							\
384	    post_atomic_barrier(model); return oldval;)
385
386__atomic_subword_lock (u8, 1)
387__atomic_subword_lock (u16, 2)
388
389#define __sync_subword_lock(type, size)					\
390  __subword(type,							\
391	    type __sync_lock_test_and_set_##size(type* ptr, type nval), \
392	    ,								\
393	    nval,							\
394	    arch_atomic_acquire_barrier_value(oldval); return oldval;)
395
396__sync_subword_lock (u8, 1)
397__sync_subword_lock (u16, 2)
398