1#ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
2#define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
3
4#define ATOMIC_INIT(...) {__VA_ARGS__}
5
6typedef enum {
7	atomic_memory_order_relaxed,
8	atomic_memory_order_acquire,
9	atomic_memory_order_release,
10	atomic_memory_order_acq_rel,
11	atomic_memory_order_seq_cst
12} atomic_memory_order_t;
13
14ATOMIC_INLINE void
15atomic_fence(atomic_memory_order_t mo) {
16	/* Easy cases first: no barrier, and full barrier. */
17	if (mo == atomic_memory_order_relaxed) {
18		asm volatile("" ::: "memory");
19		return;
20	}
21	if (mo == atomic_memory_order_seq_cst) {
22		asm volatile("" ::: "memory");
23		__sync_synchronize();
24		asm volatile("" ::: "memory");
25		return;
26	}
27	asm volatile("" ::: "memory");
28#  if defined(__i386__) || defined(__x86_64__)
29	/* This is implicit on x86. */
30#  elif defined(__ppc__)
31	asm volatile("lwsync");
32#  elif defined(__sparc__) && defined(__arch64__)
33	if (mo == atomic_memory_order_acquire) {
34		asm volatile("membar #LoadLoad | #LoadStore");
35	} else if (mo == atomic_memory_order_release) {
36		asm volatile("membar #LoadStore | #StoreStore");
37	} else {
38		asm volatile("membar #LoadLoad | #LoadStore | #StoreStore");
39	}
40#  else
41	__sync_synchronize();
42#  endif
43	asm volatile("" ::: "memory");
44}
45
46/*
47 * A correct implementation of seq_cst loads and stores on weakly ordered
48 * architectures could do either of the following:
49 *   1. store() is weak-fence -> store -> strong fence, load() is load ->
50 *      strong-fence.
51 *   2. store() is strong-fence -> store, load() is strong-fence -> load ->
52 *      weak-fence.
53 * The tricky thing is, load() and store() above can be the load or store
54 * portions of a gcc __sync builtin, so we have to follow GCC's lead, which
55 * means going with strategy 2.
56 * On strongly ordered architectures, the natural strategy is to stick a strong
57 * fence after seq_cst stores, and have naked loads.  So we want the strong
58 * fences in different places on different architectures.
59 * atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to
60 * accomplish this.
61 */
62
63ATOMIC_INLINE void
64atomic_pre_sc_load_fence() {
65#  if defined(__i386__) || defined(__x86_64__) ||			\
66    (defined(__sparc__) && defined(__arch64__))
67	atomic_fence(atomic_memory_order_relaxed);
68#  else
69	atomic_fence(atomic_memory_order_seq_cst);
70#  endif
71}
72
73ATOMIC_INLINE void
74atomic_post_sc_store_fence() {
75#  if defined(__i386__) || defined(__x86_64__) ||			\
76    (defined(__sparc__) && defined(__arch64__))
77	atomic_fence(atomic_memory_order_seq_cst);
78#  else
79	atomic_fence(atomic_memory_order_relaxed);
80#  endif
81
82}
83
84#define JEMALLOC_GENERATE_ATOMICS(type, short_type,			\
85    /* unused */ lg_size)						\
86typedef struct {							\
87	type volatile repr;						\
88} atomic_##short_type##_t;						\
89									\
90ATOMIC_INLINE type							\
91atomic_load_##short_type(const atomic_##short_type##_t *a,		\
92    atomic_memory_order_t mo) {						\
93	if (mo == atomic_memory_order_seq_cst) {			\
94		atomic_pre_sc_load_fence();				\
95	}								\
96	type result = a->repr;						\
97	if (mo != atomic_memory_order_relaxed) {			\
98		atomic_fence(atomic_memory_order_acquire);		\
99	}								\
100	return result;							\
101}									\
102									\
103ATOMIC_INLINE void							\
104atomic_store_##short_type(atomic_##short_type##_t *a,			\
105    type val, atomic_memory_order_t mo) {				\
106	if (mo != atomic_memory_order_relaxed) {			\
107		atomic_fence(atomic_memory_order_release);		\
108	}								\
109	a->repr = val;							\
110	if (mo == atomic_memory_order_seq_cst) {			\
111		atomic_post_sc_store_fence();				\
112	}								\
113}									\
114									\
115ATOMIC_INLINE type							\
116atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,	\
117    atomic_memory_order_t mo) {						\
118	/*								\
119	 * Because of FreeBSD, we care about gcc 4.2, which doesn't have\
120	 * an atomic exchange builtin.  We fake it with a CAS loop.	\
121	 */								\
122	while (true) {							\
123		type old = a->repr;					\
124		if (__sync_bool_compare_and_swap(&a->repr, old, val)) {	\
125			return old;					\
126		}							\
127	}								\
128}									\
129									\
130ATOMIC_INLINE bool							\
131atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
132    type *expected, type desired, atomic_memory_order_t success_mo,	\
133    atomic_memory_order_t failure_mo) {					\
134	type prev = __sync_val_compare_and_swap(&a->repr, *expected,	\
135	    desired);							\
136	if (prev == *expected) {					\
137		return true;						\
138	} else {							\
139		*expected = prev;					\
140		return false;						\
141	}								\
142}									\
143ATOMIC_INLINE bool							\
144atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
145    type *expected, type desired, atomic_memory_order_t success_mo,	\
146    atomic_memory_order_t failure_mo) {					\
147	type prev = __sync_val_compare_and_swap(&a->repr, *expected,	\
148	    desired);							\
149	if (prev == *expected) {					\
150		return true;						\
151	} else {							\
152		*expected = prev;					\
153		return false;						\
154	}								\
155}
156
157#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type,			\
158    /* unused */ lg_size)						\
159JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)	\
160									\
161ATOMIC_INLINE type							\
162atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val,	\
163    atomic_memory_order_t mo) {						\
164	return __sync_fetch_and_add(&a->repr, val);			\
165}									\
166									\
167ATOMIC_INLINE type							\
168atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val,	\
169    atomic_memory_order_t mo) {						\
170	return __sync_fetch_and_sub(&a->repr, val);			\
171}									\
172									\
173ATOMIC_INLINE type							\
174atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val,	\
175    atomic_memory_order_t mo) {						\
176	return __sync_fetch_and_and(&a->repr, val);			\
177}									\
178									\
179ATOMIC_INLINE type							\
180atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val,	\
181    atomic_memory_order_t mo) {						\
182	return __sync_fetch_and_or(&a->repr, val);			\
183}									\
184									\
185ATOMIC_INLINE type							\
186atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val,	\
187    atomic_memory_order_t mo) {						\
188	return __sync_fetch_and_xor(&a->repr, val);			\
189}
190
191#endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */
192