atomic.h revision 165572
1/*-
2 * Copyright (c) 1998 Doug Rabson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/amd64/include/atomic.h 165572 2006-12-27 20:26:00Z bde $
27 */
28#ifndef _MACHINE_ATOMIC_H_
29#define	_MACHINE_ATOMIC_H_
30
31#ifndef _SYS_CDEFS_H_
32#error this file needs sys/cdefs.h as a prerequisite
33#endif
34
35/*
36 * Various simple arithmetic on memory which is atomic in the presence
37 * of interrupts and multiple processors.
38 *
39 * atomic_set_char(P, V)	(*(u_char*)(P) |= (V))
40 * atomic_clear_char(P, V)	(*(u_char*)(P) &= ~(V))
41 * atomic_add_char(P, V)	(*(u_char*)(P) += (V))
42 * atomic_subtract_char(P, V)	(*(u_char*)(P) -= (V))
43 *
44 * atomic_set_short(P, V)	(*(u_short*)(P) |= (V))
45 * atomic_clear_short(P, V)	(*(u_short*)(P) &= ~(V))
46 * atomic_add_short(P, V)	(*(u_short*)(P) += (V))
47 * atomic_subtract_short(P, V)	(*(u_short*)(P) -= (V))
48 *
49 * atomic_set_int(P, V)		(*(u_int*)(P) |= (V))
50 * atomic_clear_int(P, V)	(*(u_int*)(P) &= ~(V))
51 * atomic_add_int(P, V)		(*(u_int*)(P) += (V))
52 * atomic_subtract_int(P, V)	(*(u_int*)(P) -= (V))
53 * atomic_readandclear_int(P)	(return  *(u_int*)P; *(u_int*)P = 0;)
54 *
55 * atomic_set_long(P, V)	(*(u_long*)(P) |= (V))
56 * atomic_clear_long(P, V)	(*(u_long*)(P) &= ~(V))
57 * atomic_add_long(P, V)	(*(u_long*)(P) += (V))
58 * atomic_subtract_long(P, V)	(*(u_long*)(P) -= (V))
59 * atomic_readandclear_long(P)	(return  *(u_long*)P; *(u_long*)P = 0;)
60 */
61
62/*
63 * The above functions are expanded inline in the statically-linked
64 * kernel.  Lock prefixes are generated if an SMP kernel is being
65 * built.
66 *
67 * Kernel modules call real functions which are built into the kernel.
68 * This allows kernel modules to be portable between UP and SMP systems.
69 */
70#if defined(KLD_MODULE) || !(defined(__GNUCLIKE_ASM) && defined(__CC_SUPPORTS___INLINE))
71#define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)			\
72void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
73
74int atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src);
75int atomic_cmpset_long(volatile u_long *dst, u_long exp, u_long src);
76u_int atomic_fetchadd_int(volatile u_int *p, u_int v);
77
78#define	ATOMIC_STORE_LOAD(TYPE, LOP, SOP)			\
79u_##TYPE	atomic_load_acq_##TYPE(volatile u_##TYPE *p);	\
80void		atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
81
82#else /* !KLD_MODULE && __GNUCLIKE_ASM && __CC_SUPPORTS___INLINE */
83
84/*
85 * For userland, assume the SMP case and use lock prefixes so that
86 * the binaries will run on both types of systems.
87 */
88#if defined(SMP) || !defined(_KERNEL)
89#define	MPLOCKED	lock ;
90#else
91#define	MPLOCKED
92#endif
93
94/*
95 * The assembly is volatilized to demark potential before-and-after side
96 * effects if an interrupt or SMP collision were to occur.
97 */
98#define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)		\
99static __inline void					\
100atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
101{							\
102	__asm __volatile(__XSTRING(MPLOCKED) OP		\
103			 : "=m" (*p)			\
104			 : CONS (V), "m" (*p));		\
105}							\
106struct __hack
107
108/*
109 * Atomic compare and set, used by the mutex functions
110 *
111 * if (*dst == exp) *dst = src (all 32 bit words)
112 *
113 * Returns 0 on failure, non-zero on success
114 */
115
116static __inline int
117atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src)
118{
119	u_char res;
120
121	__asm __volatile (
122	"	" __XSTRING(MPLOCKED) "	"
123	"	cmpxchgl %2,%1 ;	"
124	"       sete	%0 ;		"
125	"1:				"
126	"# atomic_cmpset_int"
127	: "=a" (res),			/* 0 */
128	  "=m" (*dst)			/* 1 */
129	: "r" (src),			/* 2 */
130	  "a" (exp),			/* 3 */
131	  "m" (*dst)			/* 4 */
132	: "memory");
133
134	return (res);
135}
136
137static __inline int
138atomic_cmpset_long(volatile u_long *dst, u_long exp, u_long src)
139{
140	u_char res;
141
142	__asm __volatile (
143	"	" __XSTRING(MPLOCKED) "	"
144	"	cmpxchgq %2,%1 ;	"
145	"       sete	%0 ;		"
146	"1:				"
147	"# atomic_cmpset_long"
148	: "=a" (res),			/* 0 */
149	  "=m" (*dst)			/* 1 */
150	: "r" (src),			/* 2 */
151	  "a" (exp),			/* 3 */
152	  "m" (*dst)			/* 4 */
153	: "memory");
154
155	return (res);
156}
157
158/*
159 * Atomically add the value of v to the integer pointed to by p and return
160 * the previous value of *p.
161 */
162static __inline u_int
163atomic_fetchadd_int(volatile u_int *p, u_int v)
164{
165
166	__asm __volatile (
167	"	" __XSTRING(MPLOCKED) "	"
168	"	xaddl	%0, %1 ;	"
169	"# atomic_fetchadd_int"
170	: "+r" (v),			/* 0 (result) */
171	  "=m" (*p)			/* 1 */
172	: "m" (*p));			/* 2 */
173
174	return (v);
175}
176
177#if defined(_KERNEL) && !defined(SMP)
178
179/*
180 * We assume that a = b will do atomic loads and stores.  However, on a
181 * PentiumPro or higher, reads may pass writes, so for that case we have
182 * to use a serializing instruction (i.e. with LOCK) to do the load in
183 * SMP kernels.  For UP kernels, however, the cache of the single processor
184 * is always consistent, so we don't need any memory barriers.
185 */
186#define	ATOMIC_STORE_LOAD(TYPE, LOP, SOP)		\
187static __inline u_##TYPE				\
188atomic_load_acq_##TYPE(volatile u_##TYPE *p)		\
189{							\
190	return (*p);					\
191}							\
192							\
193static __inline void					\
194atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
195{							\
196	*p = v;						\
197}							\
198struct __hack
199
200#else /* defined(SMP) */
201
202#define	ATOMIC_STORE_LOAD(TYPE, LOP, SOP)		\
203static __inline u_##TYPE				\
204atomic_load_acq_##TYPE(volatile u_##TYPE *p)		\
205{							\
206	u_##TYPE res;					\
207							\
208	__asm __volatile(__XSTRING(MPLOCKED) LOP	\
209	: "=a" (res),			/* 0 (result) */\
210	  "=m" (*p)			/* 1 */		\
211	: "m" (*p)			/* 2 */		\
212	: "memory");				 	\
213							\
214	return (res);					\
215}							\
216							\
217/*							\
218 * The XCHG instruction asserts LOCK automagically.	\
219 */							\
220static __inline void					\
221atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
222{							\
223	__asm __volatile(SOP				\
224	: "=m" (*p),			/* 0 */		\
225	  "+r" (v)			/* 1 */		\
226	: "m" (*p));			/* 2 */		\
227}							\
228struct __hack
229
230#endif /* SMP */
231
232#endif /* KLD_MODULE || !(__GNUCLIKE_ASM && __CC_SUPPORTS___INLINE) */
233
234ATOMIC_ASM(set,	     char,  "orb %b1,%0",  "iq",  v);
235ATOMIC_ASM(clear,    char,  "andb %b1,%0", "iq", ~v);
236ATOMIC_ASM(add,	     char,  "addb %b1,%0", "iq",  v);
237ATOMIC_ASM(subtract, char,  "subb %b1,%0", "iq",  v);
238
239ATOMIC_ASM(set,	     short, "orw %w1,%0",  "ir",  v);
240ATOMIC_ASM(clear,    short, "andw %w1,%0", "ir", ~v);
241ATOMIC_ASM(add,	     short, "addw %w1,%0", "ir",  v);
242ATOMIC_ASM(subtract, short, "subw %w1,%0", "ir",  v);
243
244ATOMIC_ASM(set,	     int,   "orl %1,%0",   "ir",  v);
245ATOMIC_ASM(clear,    int,   "andl %1,%0",  "ir", ~v);
246ATOMIC_ASM(add,	     int,   "addl %1,%0",  "ir",  v);
247ATOMIC_ASM(subtract, int,   "subl %1,%0",  "ir",  v);
248
249ATOMIC_ASM(set,	     long,  "orq %1,%0",   "ir",  v);
250ATOMIC_ASM(clear,    long,  "andq %1,%0",  "ir", ~v);
251ATOMIC_ASM(add,	     long,  "addq %1,%0",  "ir",  v);
252ATOMIC_ASM(subtract, long,  "subq %1,%0",  "ir",  v);
253
254ATOMIC_STORE_LOAD(char,	"cmpxchgb %b0,%1", "xchgb %b1,%0");
255ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0");
256ATOMIC_STORE_LOAD(int,	"cmpxchgl %0,%1",  "xchgl %1,%0");
257ATOMIC_STORE_LOAD(long,	"cmpxchgq %0,%1",  "xchgq %1,%0");
258
259#undef ATOMIC_ASM
260#undef ATOMIC_STORE_LOAD
261
262#if !defined(WANT_FUNCTIONS)
263
264/* Read the current value and store a zero in the destination. */
265#if defined(__GNUCLIKE_ASM) && defined(__CC_SUPPORTS___INLINE)
266
267static __inline u_int
268atomic_readandclear_int(volatile u_int *addr)
269{
270	u_int result;
271
272	result = 0;
273	__asm __volatile (
274	"	xchgl	%1,%0 ;		"
275	"# atomic_readandclear_int"
276	: "+r" (result),		/* 0 (result) */
277	  "=m" (*addr)			/* 1 (addr) */
278	: "m" (*addr));
279
280	return (result);
281}
282
283static __inline u_long
284atomic_readandclear_long(volatile u_long *addr)
285{
286	u_long result;
287
288	result = 0;
289	__asm __volatile (
290	"	xchgq	%1,%0 ;		"
291	"# atomic_readandclear_long"
292	: "+r" (result),		/* 0 (result) */
293	  "=m" (*addr)			/* 1 (addr) */
294	: "m" (*addr));
295
296	return (result);
297}
298
299#else /* !(__GNUCLIKE_ASM && __CC_SUPPORTS___INLINE) */
300
301u_int	atomic_readandclear_int(volatile u_int *);
302u_long	atomic_readandclear_long(volatile u_long *);
303
304#endif /* __GNUCLIKE_ASM && __CC_SUPPORTS___INLINE */
305
306/* Acquire and release variants are identical to the normal ones. */
307#define	atomic_set_acq_char		atomic_set_char
308#define	atomic_set_rel_char		atomic_set_char
309#define	atomic_clear_acq_char		atomic_clear_char
310#define	atomic_clear_rel_char		atomic_clear_char
311#define	atomic_add_acq_char		atomic_add_char
312#define	atomic_add_rel_char		atomic_add_char
313#define	atomic_subtract_acq_char	atomic_subtract_char
314#define	atomic_subtract_rel_char	atomic_subtract_char
315
316#define	atomic_set_acq_short		atomic_set_short
317#define	atomic_set_rel_short		atomic_set_short
318#define	atomic_clear_acq_short		atomic_clear_short
319#define	atomic_clear_rel_short		atomic_clear_short
320#define	atomic_add_acq_short		atomic_add_short
321#define	atomic_add_rel_short		atomic_add_short
322#define	atomic_subtract_acq_short	atomic_subtract_short
323#define	atomic_subtract_rel_short	atomic_subtract_short
324
325#define	atomic_set_acq_int		atomic_set_int
326#define	atomic_set_rel_int		atomic_set_int
327#define	atomic_clear_acq_int		atomic_clear_int
328#define	atomic_clear_rel_int		atomic_clear_int
329#define	atomic_add_acq_int		atomic_add_int
330#define	atomic_add_rel_int		atomic_add_int
331#define	atomic_subtract_acq_int		atomic_subtract_int
332#define	atomic_subtract_rel_int		atomic_subtract_int
333#define	atomic_cmpset_acq_int		atomic_cmpset_int
334#define	atomic_cmpset_rel_int		atomic_cmpset_int
335
336#define	atomic_set_acq_long		atomic_set_long
337#define	atomic_set_rel_long		atomic_set_long
338#define	atomic_clear_acq_long		atomic_clear_long
339#define	atomic_clear_rel_long		atomic_clear_long
340#define	atomic_add_acq_long		atomic_add_long
341#define	atomic_add_rel_long		atomic_add_long
342#define	atomic_subtract_acq_long	atomic_subtract_long
343#define	atomic_subtract_rel_long	atomic_subtract_long
344#define	atomic_cmpset_acq_long		atomic_cmpset_long
345#define	atomic_cmpset_rel_long		atomic_cmpset_long
346
347/* Operations on 8-bit bytes. */
348#define	atomic_set_8		atomic_set_char
349#define	atomic_set_acq_8	atomic_set_acq_char
350#define	atomic_set_rel_8	atomic_set_rel_char
351#define	atomic_clear_8		atomic_clear_char
352#define	atomic_clear_acq_8	atomic_clear_acq_char
353#define	atomic_clear_rel_8	atomic_clear_rel_char
354#define	atomic_add_8		atomic_add_char
355#define	atomic_add_acq_8	atomic_add_acq_char
356#define	atomic_add_rel_8	atomic_add_rel_char
357#define	atomic_subtract_8	atomic_subtract_char
358#define	atomic_subtract_acq_8	atomic_subtract_acq_char
359#define	atomic_subtract_rel_8	atomic_subtract_rel_char
360#define	atomic_load_acq_8	atomic_load_acq_char
361#define	atomic_store_rel_8	atomic_store_rel_char
362
363/* Operations on 16-bit words. */
364#define	atomic_set_16		atomic_set_short
365#define	atomic_set_acq_16	atomic_set_acq_short
366#define	atomic_set_rel_16	atomic_set_rel_short
367#define	atomic_clear_16		atomic_clear_short
368#define	atomic_clear_acq_16	atomic_clear_acq_short
369#define	atomic_clear_rel_16	atomic_clear_rel_short
370#define	atomic_add_16		atomic_add_short
371#define	atomic_add_acq_16	atomic_add_acq_short
372#define	atomic_add_rel_16	atomic_add_rel_short
373#define	atomic_subtract_16	atomic_subtract_short
374#define	atomic_subtract_acq_16	atomic_subtract_acq_short
375#define	atomic_subtract_rel_16	atomic_subtract_rel_short
376#define	atomic_load_acq_16	atomic_load_acq_short
377#define	atomic_store_rel_16	atomic_store_rel_short
378
379/* Operations on 32-bit double words. */
380#define	atomic_set_32		atomic_set_int
381#define	atomic_set_acq_32	atomic_set_acq_int
382#define	atomic_set_rel_32	atomic_set_rel_int
383#define	atomic_clear_32		atomic_clear_int
384#define	atomic_clear_acq_32	atomic_clear_acq_int
385#define	atomic_clear_rel_32	atomic_clear_rel_int
386#define	atomic_add_32		atomic_add_int
387#define	atomic_add_acq_32	atomic_add_acq_int
388#define	atomic_add_rel_32	atomic_add_rel_int
389#define	atomic_subtract_32	atomic_subtract_int
390#define	atomic_subtract_acq_32	atomic_subtract_acq_int
391#define	atomic_subtract_rel_32	atomic_subtract_rel_int
392#define	atomic_load_acq_32	atomic_load_acq_int
393#define	atomic_store_rel_32	atomic_store_rel_int
394#define	atomic_cmpset_32	atomic_cmpset_int
395#define	atomic_cmpset_acq_32	atomic_cmpset_acq_int
396#define	atomic_cmpset_rel_32	atomic_cmpset_rel_int
397#define	atomic_readandclear_32	atomic_readandclear_int
398#define	atomic_fetchadd_32	atomic_fetchadd_int
399
400/* Operations on 64-bit quad words. */
401#define	atomic_set_64		atomic_set_long
402#define	atomic_set_acq_64	atomic_set_acq_long
403#define	atomic_set_rel_64	atomic_set_rel_long
404#define	atomic_clear_64		atomic_clear_long
405#define	atomic_clear_acq_64	atomic_clear_acq_long
406#define	atomic_clear_rel_64	atomic_clear_rel_long
407#define	atomic_add_64		atomic_add_long
408#define	atomic_add_acq_64	atomic_add_acq_long
409#define	atomic_add_rel_64	atomic_add_rel_long
410#define	atomic_subtract_64	atomic_subtract_long
411#define	atomic_subtract_acq_64	atomic_subtract_acq_long
412#define	atomic_subtract_rel_64	atomic_subtract_rel_long
413#define	atomic_load_acq_64	atomic_load_acq_long
414#define	atomic_store_rel_64	atomic_store_rel_long
415#define	atomic_cmpset_64	atomic_cmpset_long
416#define	atomic_cmpset_acq_64	atomic_cmpset_acq_long
417#define	atomic_cmpset_rel_64	atomic_cmpset_rel_long
418#define	atomic_readandclear_64	atomic_readandclear_long
419
420/* Operations on pointers. */
421#define	atomic_set_ptr		atomic_set_long
422#define	atomic_set_acq_ptr	atomic_set_acq_long
423#define	atomic_set_rel_ptr	atomic_set_rel_long
424#define	atomic_clear_ptr	atomic_clear_long
425#define	atomic_clear_acq_ptr	atomic_clear_acq_long
426#define	atomic_clear_rel_ptr	atomic_clear_rel_long
427#define	atomic_add_ptr		atomic_add_long
428#define	atomic_add_acq_ptr	atomic_add_acq_long
429#define	atomic_add_rel_ptr	atomic_add_rel_long
430#define	atomic_subtract_ptr	atomic_subtract_long
431#define	atomic_subtract_acq_ptr	atomic_subtract_acq_long
432#define	atomic_subtract_rel_ptr	atomic_subtract_rel_long
433#define	atomic_load_acq_ptr	atomic_load_acq_long
434#define	atomic_store_rel_ptr	atomic_store_rel_long
435#define	atomic_cmpset_ptr	atomic_cmpset_long
436#define	atomic_cmpset_acq_ptr	atomic_cmpset_acq_long
437#define	atomic_cmpset_rel_ptr	atomic_cmpset_rel_long
438#define	atomic_readandclear_ptr	atomic_readandclear_long
439
440#endif	/* !defined(WANT_FUNCTIONS) */
441#endif /* ! _MACHINE_ATOMIC_H_ */
442