1/*-
2 * Copyright (c) 1998 Doug Rabson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: releng/10.3/sys/amd64/include/atomic.h 254618 2013-08-21 22:05:58Z jkim $
27 */
28#ifndef _MACHINE_ATOMIC_H_
29#define	_MACHINE_ATOMIC_H_
30
31#ifndef _SYS_CDEFS_H_
32#error this file needs sys/cdefs.h as a prerequisite
33#endif
34
35#define	mb()	__asm __volatile("mfence;" : : : "memory")
36#define	wmb()	__asm __volatile("sfence;" : : : "memory")
37#define	rmb()	__asm __volatile("lfence;" : : : "memory")
38
39/*
40 * Various simple operations on memory, each of which is atomic in the
41 * presence of interrupts and multiple processors.
42 *
43 * atomic_set_char(P, V)	(*(u_char *)(P) |= (V))
44 * atomic_clear_char(P, V)	(*(u_char *)(P) &= ~(V))
45 * atomic_add_char(P, V)	(*(u_char *)(P) += (V))
46 * atomic_subtract_char(P, V)	(*(u_char *)(P) -= (V))
47 *
48 * atomic_set_short(P, V)	(*(u_short *)(P) |= (V))
49 * atomic_clear_short(P, V)	(*(u_short *)(P) &= ~(V))
50 * atomic_add_short(P, V)	(*(u_short *)(P) += (V))
51 * atomic_subtract_short(P, V)	(*(u_short *)(P) -= (V))
52 *
53 * atomic_set_int(P, V)		(*(u_int *)(P) |= (V))
54 * atomic_clear_int(P, V)	(*(u_int *)(P) &= ~(V))
55 * atomic_add_int(P, V)		(*(u_int *)(P) += (V))
56 * atomic_subtract_int(P, V)	(*(u_int *)(P) -= (V))
57 * atomic_swap_int(P, V)	(return (*(u_int *)(P)); *(u_int *)(P) = (V);)
58 * atomic_readandclear_int(P)	(return (*(u_int *)(P)); *(u_int *)(P) = 0;)
59 *
60 * atomic_set_long(P, V)	(*(u_long *)(P) |= (V))
61 * atomic_clear_long(P, V)	(*(u_long *)(P) &= ~(V))
62 * atomic_add_long(P, V)	(*(u_long *)(P) += (V))
63 * atomic_subtract_long(P, V)	(*(u_long *)(P) -= (V))
64 * atomic_swap_long(P, V)	(return (*(u_long *)(P)); *(u_long *)(P) = (V);)
65 * atomic_readandclear_long(P)	(return (*(u_long *)(P)); *(u_long *)(P) = 0;)
66 */
67
68/*
69 * The above functions are expanded inline in the statically-linked
70 * kernel.  Lock prefixes are generated if an SMP kernel is being
71 * built.
72 *
73 * Kernel modules call real functions which are built into the kernel.
74 * This allows kernel modules to be portable between UP and SMP systems.
75 */
76#if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM)
77#define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)			\
78void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v);	\
79void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
80
81int	atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src);
82int	atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src);
83u_int	atomic_fetchadd_int(volatile u_int *p, u_int v);
84u_long	atomic_fetchadd_long(volatile u_long *p, u_long v);
85int	atomic_testandset_int(volatile u_int *p, u_int v);
86int	atomic_testandset_long(volatile u_long *p, u_int v);
87
88#define	ATOMIC_LOAD(TYPE, LOP)					\
89u_##TYPE	atomic_load_acq_##TYPE(volatile u_##TYPE *p)
90#define	ATOMIC_STORE(TYPE)					\
91void		atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
92
93#else /* !KLD_MODULE && __GNUCLIKE_ASM */
94
95/*
96 * For userland, always use lock prefixes so that the binaries will run
97 * on both SMP and !SMP systems.
98 */
99#if defined(SMP) || !defined(_KERNEL)
100#define	MPLOCKED	"lock ; "
101#else
102#define	MPLOCKED
103#endif
104
105/*
106 * The assembly is volatilized to avoid code chunk removal by the compiler.
107 * GCC aggressively reorders operations and memory clobbering is necessary
108 * in order to avoid that for memory barriers.
109 */
110#define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)		\
111static __inline void					\
112atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
113{							\
114	__asm __volatile(MPLOCKED OP			\
115	: "+m" (*p)					\
116	: CONS (V)					\
117	: "cc");					\
118}							\
119							\
120static __inline void					\
121atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
122{							\
123	__asm __volatile(MPLOCKED OP			\
124	: "+m" (*p)					\
125	: CONS (V)					\
126	: "memory", "cc");				\
127}							\
128struct __hack
129
130/*
131 * Atomic compare and set, used by the mutex functions
132 *
133 * if (*dst == expect) *dst = src (all 32 bit words)
134 *
135 * Returns 0 on failure, non-zero on success
136 */
137
138static __inline int
139atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src)
140{
141	u_char res;
142
143	__asm __volatile(
144	"	" MPLOCKED "		"
145	"	cmpxchgl %3,%1 ;	"
146	"       sete	%0 ;		"
147	"# atomic_cmpset_int"
148	: "=q" (res),			/* 0 */
149	  "+m" (*dst),			/* 1 */
150	  "+a" (expect)			/* 2 */
151	: "r" (src)			/* 3 */
152	: "memory", "cc");
153	return (res);
154}
155
156static __inline int
157atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src)
158{
159	u_char res;
160
161	__asm __volatile(
162	"	" MPLOCKED "		"
163	"	cmpxchgq %3,%1 ;	"
164	"       sete	%0 ;		"
165	"# atomic_cmpset_long"
166	: "=q" (res),			/* 0 */
167	  "+m" (*dst),			/* 1 */
168	  "+a" (expect)			/* 2 */
169	: "r" (src)			/* 3 */
170	: "memory", "cc");
171	return (res);
172}
173
174/*
175 * Atomically add the value of v to the integer pointed to by p and return
176 * the previous value of *p.
177 */
178static __inline u_int
179atomic_fetchadd_int(volatile u_int *p, u_int v)
180{
181
182	__asm __volatile(
183	"	" MPLOCKED "		"
184	"	xaddl	%0,%1 ;		"
185	"# atomic_fetchadd_int"
186	: "+r" (v),			/* 0 */
187	  "+m" (*p)			/* 1 */
188	: : "cc");
189	return (v);
190}
191
192/*
193 * Atomically add the value of v to the long integer pointed to by p and return
194 * the previous value of *p.
195 */
196static __inline u_long
197atomic_fetchadd_long(volatile u_long *p, u_long v)
198{
199
200	__asm __volatile(
201	"	" MPLOCKED "		"
202	"	xaddq	%0,%1 ;		"
203	"# atomic_fetchadd_long"
204	: "+r" (v),			/* 0 */
205	  "+m" (*p)			/* 1 */
206	: : "cc");
207	return (v);
208}
209
210static __inline int
211atomic_testandset_int(volatile u_int *p, u_int v)
212{
213	u_char res;
214
215	__asm __volatile(
216	"	" MPLOCKED "		"
217	"	btsl	%2,%1 ;		"
218	"	setc	%0 ;		"
219	"# atomic_testandset_int"
220	: "=q" (res),			/* 0 */
221	  "+m" (*p)			/* 1 */
222	: "Ir" (v & 0x1f)		/* 2 */
223	: "cc");
224	return (res);
225}
226
227static __inline int
228atomic_testandset_long(volatile u_long *p, u_int v)
229{
230	u_char res;
231
232	__asm __volatile(
233	"	" MPLOCKED "		"
234	"	btsq	%2,%1 ;		"
235	"	setc	%0 ;		"
236	"# atomic_testandset_long"
237	: "=q" (res),			/* 0 */
238	  "+m" (*p)			/* 1 */
239	: "Jr" ((u_long)(v & 0x3f))	/* 2 */
240	: "cc");
241	return (res);
242}
243
244/*
245 * We assume that a = b will do atomic loads and stores.  Due to the
246 * IA32 memory model, a simple store guarantees release semantics.
247 *
248 * However, loads may pass stores, so for atomic_load_acq we have to
249 * ensure a Store/Load barrier to do the load in SMP kernels.  We use
250 * "lock cmpxchg" as recommended by the AMD Software Optimization
251 * Guide, and not mfence.  For UP kernels, however, the cache of the
252 * single processor is always consistent, so we only need to take care
253 * of the compiler.
254 */
255#define	ATOMIC_STORE(TYPE)				\
256static __inline void					\
257atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
258{							\
259	__compiler_membar();				\
260	*p = v;						\
261}							\
262struct __hack
263
264#if defined(_KERNEL) && !defined(SMP)
265
266#define	ATOMIC_LOAD(TYPE, LOP)				\
267static __inline u_##TYPE				\
268atomic_load_acq_##TYPE(volatile u_##TYPE *p)		\
269{							\
270	u_##TYPE tmp;					\
271							\
272	tmp = *p;					\
273	__compiler_membar();				\
274	return (tmp);					\
275}							\
276struct __hack
277
278#else /* !(_KERNEL && !SMP) */
279
280#define	ATOMIC_LOAD(TYPE, LOP)				\
281static __inline u_##TYPE				\
282atomic_load_acq_##TYPE(volatile u_##TYPE *p)		\
283{							\
284	u_##TYPE res;					\
285							\
286	__asm __volatile(MPLOCKED LOP			\
287	: "=a" (res),			/* 0 */		\
288	  "+m" (*p)			/* 1 */		\
289	: : "memory", "cc");				\
290	return (res);					\
291}							\
292struct __hack
293
294#endif /* _KERNEL && !SMP */
295
296#endif /* KLD_MODULE || !__GNUCLIKE_ASM */
297
298ATOMIC_ASM(set,	     char,  "orb %b1,%0",  "iq",  v);
299ATOMIC_ASM(clear,    char,  "andb %b1,%0", "iq", ~v);
300ATOMIC_ASM(add,	     char,  "addb %b1,%0", "iq",  v);
301ATOMIC_ASM(subtract, char,  "subb %b1,%0", "iq",  v);
302
303ATOMIC_ASM(set,	     short, "orw %w1,%0",  "ir",  v);
304ATOMIC_ASM(clear,    short, "andw %w1,%0", "ir", ~v);
305ATOMIC_ASM(add,	     short, "addw %w1,%0", "ir",  v);
306ATOMIC_ASM(subtract, short, "subw %w1,%0", "ir",  v);
307
308ATOMIC_ASM(set,	     int,   "orl %1,%0",   "ir",  v);
309ATOMIC_ASM(clear,    int,   "andl %1,%0",  "ir", ~v);
310ATOMIC_ASM(add,	     int,   "addl %1,%0",  "ir",  v);
311ATOMIC_ASM(subtract, int,   "subl %1,%0",  "ir",  v);
312
313ATOMIC_ASM(set,	     long,  "orq %1,%0",   "ir",  v);
314ATOMIC_ASM(clear,    long,  "andq %1,%0",  "ir", ~v);
315ATOMIC_ASM(add,	     long,  "addq %1,%0",  "ir",  v);
316ATOMIC_ASM(subtract, long,  "subq %1,%0",  "ir",  v);
317
318ATOMIC_LOAD(char,  "cmpxchgb %b0,%1");
319ATOMIC_LOAD(short, "cmpxchgw %w0,%1");
320ATOMIC_LOAD(int,   "cmpxchgl %0,%1");
321ATOMIC_LOAD(long,  "cmpxchgq %0,%1");
322
323ATOMIC_STORE(char);
324ATOMIC_STORE(short);
325ATOMIC_STORE(int);
326ATOMIC_STORE(long);
327
328#undef ATOMIC_ASM
329#undef ATOMIC_LOAD
330#undef ATOMIC_STORE
331
332#ifndef WANT_FUNCTIONS
333
334/* Read the current value and store a new value in the destination. */
335#ifdef __GNUCLIKE_ASM
336
337static __inline u_int
338atomic_swap_int(volatile u_int *p, u_int v)
339{
340
341	__asm __volatile(
342	"	xchgl	%1,%0 ;		"
343	"# atomic_swap_int"
344	: "+r" (v),			/* 0 */
345	  "+m" (*p));			/* 1 */
346	return (v);
347}
348
349static __inline u_long
350atomic_swap_long(volatile u_long *p, u_long v)
351{
352
353	__asm __volatile(
354	"	xchgq	%1,%0 ;		"
355	"# atomic_swap_long"
356	: "+r" (v),			/* 0 */
357	  "+m" (*p));			/* 1 */
358	return (v);
359}
360
361#else /* !__GNUCLIKE_ASM */
362
363u_int	atomic_swap_int(volatile u_int *p, u_int v);
364u_long	atomic_swap_long(volatile u_long *p, u_long v);
365
366#endif /* __GNUCLIKE_ASM */
367
368#define	atomic_set_acq_char		atomic_set_barr_char
369#define	atomic_set_rel_char		atomic_set_barr_char
370#define	atomic_clear_acq_char		atomic_clear_barr_char
371#define	atomic_clear_rel_char		atomic_clear_barr_char
372#define	atomic_add_acq_char		atomic_add_barr_char
373#define	atomic_add_rel_char		atomic_add_barr_char
374#define	atomic_subtract_acq_char	atomic_subtract_barr_char
375#define	atomic_subtract_rel_char	atomic_subtract_barr_char
376
377#define	atomic_set_acq_short		atomic_set_barr_short
378#define	atomic_set_rel_short		atomic_set_barr_short
379#define	atomic_clear_acq_short		atomic_clear_barr_short
380#define	atomic_clear_rel_short		atomic_clear_barr_short
381#define	atomic_add_acq_short		atomic_add_barr_short
382#define	atomic_add_rel_short		atomic_add_barr_short
383#define	atomic_subtract_acq_short	atomic_subtract_barr_short
384#define	atomic_subtract_rel_short	atomic_subtract_barr_short
385
386#define	atomic_set_acq_int		atomic_set_barr_int
387#define	atomic_set_rel_int		atomic_set_barr_int
388#define	atomic_clear_acq_int		atomic_clear_barr_int
389#define	atomic_clear_rel_int		atomic_clear_barr_int
390#define	atomic_add_acq_int		atomic_add_barr_int
391#define	atomic_add_rel_int		atomic_add_barr_int
392#define	atomic_subtract_acq_int		atomic_subtract_barr_int
393#define	atomic_subtract_rel_int		atomic_subtract_barr_int
394#define	atomic_cmpset_acq_int		atomic_cmpset_int
395#define	atomic_cmpset_rel_int		atomic_cmpset_int
396
397#define	atomic_set_acq_long		atomic_set_barr_long
398#define	atomic_set_rel_long		atomic_set_barr_long
399#define	atomic_clear_acq_long		atomic_clear_barr_long
400#define	atomic_clear_rel_long		atomic_clear_barr_long
401#define	atomic_add_acq_long		atomic_add_barr_long
402#define	atomic_add_rel_long		atomic_add_barr_long
403#define	atomic_subtract_acq_long	atomic_subtract_barr_long
404#define	atomic_subtract_rel_long	atomic_subtract_barr_long
405#define	atomic_cmpset_acq_long		atomic_cmpset_long
406#define	atomic_cmpset_rel_long		atomic_cmpset_long
407
408#define	atomic_readandclear_int(p)	atomic_swap_int(p, 0)
409#define	atomic_readandclear_long(p)	atomic_swap_long(p, 0)
410
411/* Operations on 8-bit bytes. */
412#define	atomic_set_8		atomic_set_char
413#define	atomic_set_acq_8	atomic_set_acq_char
414#define	atomic_set_rel_8	atomic_set_rel_char
415#define	atomic_clear_8		atomic_clear_char
416#define	atomic_clear_acq_8	atomic_clear_acq_char
417#define	atomic_clear_rel_8	atomic_clear_rel_char
418#define	atomic_add_8		atomic_add_char
419#define	atomic_add_acq_8	atomic_add_acq_char
420#define	atomic_add_rel_8	atomic_add_rel_char
421#define	atomic_subtract_8	atomic_subtract_char
422#define	atomic_subtract_acq_8	atomic_subtract_acq_char
423#define	atomic_subtract_rel_8	atomic_subtract_rel_char
424#define	atomic_load_acq_8	atomic_load_acq_char
425#define	atomic_store_rel_8	atomic_store_rel_char
426
427/* Operations on 16-bit words. */
428#define	atomic_set_16		atomic_set_short
429#define	atomic_set_acq_16	atomic_set_acq_short
430#define	atomic_set_rel_16	atomic_set_rel_short
431#define	atomic_clear_16		atomic_clear_short
432#define	atomic_clear_acq_16	atomic_clear_acq_short
433#define	atomic_clear_rel_16	atomic_clear_rel_short
434#define	atomic_add_16		atomic_add_short
435#define	atomic_add_acq_16	atomic_add_acq_short
436#define	atomic_add_rel_16	atomic_add_rel_short
437#define	atomic_subtract_16	atomic_subtract_short
438#define	atomic_subtract_acq_16	atomic_subtract_acq_short
439#define	atomic_subtract_rel_16	atomic_subtract_rel_short
440#define	atomic_load_acq_16	atomic_load_acq_short
441#define	atomic_store_rel_16	atomic_store_rel_short
442
443/* Operations on 32-bit double words. */
444#define	atomic_set_32		atomic_set_int
445#define	atomic_set_acq_32	atomic_set_acq_int
446#define	atomic_set_rel_32	atomic_set_rel_int
447#define	atomic_clear_32		atomic_clear_int
448#define	atomic_clear_acq_32	atomic_clear_acq_int
449#define	atomic_clear_rel_32	atomic_clear_rel_int
450#define	atomic_add_32		atomic_add_int
451#define	atomic_add_acq_32	atomic_add_acq_int
452#define	atomic_add_rel_32	atomic_add_rel_int
453#define	atomic_subtract_32	atomic_subtract_int
454#define	atomic_subtract_acq_32	atomic_subtract_acq_int
455#define	atomic_subtract_rel_32	atomic_subtract_rel_int
456#define	atomic_load_acq_32	atomic_load_acq_int
457#define	atomic_store_rel_32	atomic_store_rel_int
458#define	atomic_cmpset_32	atomic_cmpset_int
459#define	atomic_cmpset_acq_32	atomic_cmpset_acq_int
460#define	atomic_cmpset_rel_32	atomic_cmpset_rel_int
461#define	atomic_swap_32		atomic_swap_int
462#define	atomic_readandclear_32	atomic_readandclear_int
463#define	atomic_fetchadd_32	atomic_fetchadd_int
464#define	atomic_testandset_32	atomic_testandset_int
465
466/* Operations on 64-bit quad words. */
467#define	atomic_set_64		atomic_set_long
468#define	atomic_set_acq_64	atomic_set_acq_long
469#define	atomic_set_rel_64	atomic_set_rel_long
470#define	atomic_clear_64		atomic_clear_long
471#define	atomic_clear_acq_64	atomic_clear_acq_long
472#define	atomic_clear_rel_64	atomic_clear_rel_long
473#define	atomic_add_64		atomic_add_long
474#define	atomic_add_acq_64	atomic_add_acq_long
475#define	atomic_add_rel_64	atomic_add_rel_long
476#define	atomic_subtract_64	atomic_subtract_long
477#define	atomic_subtract_acq_64	atomic_subtract_acq_long
478#define	atomic_subtract_rel_64	atomic_subtract_rel_long
479#define	atomic_load_acq_64	atomic_load_acq_long
480#define	atomic_store_rel_64	atomic_store_rel_long
481#define	atomic_cmpset_64	atomic_cmpset_long
482#define	atomic_cmpset_acq_64	atomic_cmpset_acq_long
483#define	atomic_cmpset_rel_64	atomic_cmpset_rel_long
484#define	atomic_swap_64		atomic_swap_long
485#define	atomic_readandclear_64	atomic_readandclear_long
486#define	atomic_testandset_64	atomic_testandset_long
487
488/* Operations on pointers. */
489#define	atomic_set_ptr		atomic_set_long
490#define	atomic_set_acq_ptr	atomic_set_acq_long
491#define	atomic_set_rel_ptr	atomic_set_rel_long
492#define	atomic_clear_ptr	atomic_clear_long
493#define	atomic_clear_acq_ptr	atomic_clear_acq_long
494#define	atomic_clear_rel_ptr	atomic_clear_rel_long
495#define	atomic_add_ptr		atomic_add_long
496#define	atomic_add_acq_ptr	atomic_add_acq_long
497#define	atomic_add_rel_ptr	atomic_add_rel_long
498#define	atomic_subtract_ptr	atomic_subtract_long
499#define	atomic_subtract_acq_ptr	atomic_subtract_acq_long
500#define	atomic_subtract_rel_ptr	atomic_subtract_rel_long
501#define	atomic_load_acq_ptr	atomic_load_acq_long
502#define	atomic_store_rel_ptr	atomic_store_rel_long
503#define	atomic_cmpset_ptr	atomic_cmpset_long
504#define	atomic_cmpset_acq_ptr	atomic_cmpset_acq_long
505#define	atomic_cmpset_rel_ptr	atomic_cmpset_rel_long
506#define	atomic_swap_ptr		atomic_swap_long
507#define	atomic_readandclear_ptr	atomic_readandclear_long
508
509#endif /* !WANT_FUNCTIONS */
510
511#endif /* !_MACHINE_ATOMIC_H_ */
512