atomic.h revision 254620
138517Sdfr/*-
238517Sdfr * Copyright (c) 1998 Doug Rabson
338517Sdfr * All rights reserved.
438517Sdfr *
538517Sdfr * Redistribution and use in source and binary forms, with or without
638517Sdfr * modification, are permitted provided that the following conditions
738517Sdfr * are met:
838517Sdfr * 1. Redistributions of source code must retain the above copyright
938517Sdfr *    notice, this list of conditions and the following disclaimer.
1038517Sdfr * 2. Redistributions in binary form must reproduce the above copyright
1138517Sdfr *    notice, this list of conditions and the following disclaimer in the
1238517Sdfr *    documentation and/or other materials provided with the distribution.
1338517Sdfr *
1438517Sdfr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1538517Sdfr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1638517Sdfr * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1738517Sdfr * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1838517Sdfr * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1938517Sdfr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2038517Sdfr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2138517Sdfr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2238517Sdfr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2338517Sdfr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2438517Sdfr * SUCH DAMAGE.
2538517Sdfr *
2650477Speter * $FreeBSD: head/sys/i386/include/atomic.h 254620 2013-08-21 22:30:11Z jkim $
2738517Sdfr */
2838517Sdfr#ifndef _MACHINE_ATOMIC_H_
29147855Sjhb#define	_MACHINE_ATOMIC_H_
3038517Sdfr
31143063Sjoerg#ifndef _SYS_CDEFS_H_
32143063Sjoerg#error this file needs sys/cdefs.h as a prerequisite
33143063Sjoerg#endif
34143063Sjoerg
35254619Sjkim#ifdef _KERNEL
36254619Sjkim#include <machine/md_var.h>
37254619Sjkim#include <machine/specialreg.h>
38254619Sjkim#endif
39254619Sjkim
40236456Skib#define	mb()	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc")
41236456Skib#define	wmb()	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc")
42236456Skib#define	rmb()	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc")
43185162Skmacy
4438517Sdfr/*
45165635Sbde * Various simple operations on memory, each of which is atomic in the
46165635Sbde * presence of interrupts and multiple processors.
4738517Sdfr *
48165633Sbde * atomic_set_char(P, V)	(*(u_char *)(P) |= (V))
49165633Sbde * atomic_clear_char(P, V)	(*(u_char *)(P) &= ~(V))
50165633Sbde * atomic_add_char(P, V)	(*(u_char *)(P) += (V))
51165633Sbde * atomic_subtract_char(P, V)	(*(u_char *)(P) -= (V))
5248797Salc *
53165633Sbde * atomic_set_short(P, V)	(*(u_short *)(P) |= (V))
54165633Sbde * atomic_clear_short(P, V)	(*(u_short *)(P) &= ~(V))
55165633Sbde * atomic_add_short(P, V)	(*(u_short *)(P) += (V))
56165633Sbde * atomic_subtract_short(P, V)	(*(u_short *)(P) -= (V))
5748797Salc *
58165633Sbde * atomic_set_int(P, V)		(*(u_int *)(P) |= (V))
59165633Sbde * atomic_clear_int(P, V)	(*(u_int *)(P) &= ~(V))
60165633Sbde * atomic_add_int(P, V)		(*(u_int *)(P) += (V))
61165633Sbde * atomic_subtract_int(P, V)	(*(u_int *)(P) -= (V))
62254617Sjkim * atomic_swap_int(P, V)	(return (*(u_int *)(P)); *(u_int *)(P) = (V);)
63165635Sbde * atomic_readandclear_int(P)	(return (*(u_int *)(P)); *(u_int *)(P) = 0;)
6448797Salc *
65165633Sbde * atomic_set_long(P, V)	(*(u_long *)(P) |= (V))
66165633Sbde * atomic_clear_long(P, V)	(*(u_long *)(P) &= ~(V))
67165633Sbde * atomic_add_long(P, V)	(*(u_long *)(P) += (V))
68165633Sbde * atomic_subtract_long(P, V)	(*(u_long *)(P) -= (V))
69254617Sjkim * atomic_swap_long(P, V)	(return (*(u_long *)(P)); *(u_long *)(P) = (V);)
70165635Sbde * atomic_readandclear_long(P)	(return (*(u_long *)(P)); *(u_long *)(P) = 0;)
7138517Sdfr */
7238517Sdfr
7348797Salc/*
7449999Salc * The above functions are expanded inline in the statically-linked
7549999Salc * kernel.  Lock prefixes are generated if an SMP kernel is being
7649999Salc * built.
7749999Salc *
7849999Salc * Kernel modules call real functions which are built into the kernel.
7949999Salc * This allows kernel modules to be portable between UP and SMP systems.
8048797Salc */
81147855Sjhb#if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM)
82147855Sjhb#define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)			\
83197803Sattiliovoid atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v);	\
84197803Sattiliovoid atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
8549999Salc
86208332Sphkint	atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src);
87165633Sbdeu_int	atomic_fetchadd_int(volatile u_int *p, u_int v);
88254617Sjkimint	atomic_testandset_int(volatile u_int *p, u_int v);
8965514Sphk
90236456Skib#define	ATOMIC_LOAD(TYPE, LOP)					\
91236456Skibu_##TYPE	atomic_load_acq_##TYPE(volatile u_##TYPE *p)
92236456Skib#define	ATOMIC_STORE(TYPE)					\
93100251Smarkmvoid		atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
9471085Sjhb
95254620Sjkimint		atomic_cmpset_64(volatile uint64_t *, uint64_t, uint64_t);
96254619Sjkimuint64_t	atomic_load_acq_64(volatile uint64_t *);
97254619Sjkimvoid		atomic_store_rel_64(volatile uint64_t *, uint64_t);
98254620Sjkimuint64_t	atomic_swap_64(volatile uint64_t *, uint64_t);
99254619Sjkim
100147855Sjhb#else /* !KLD_MODULE && __GNUCLIKE_ASM */
10172358Smarkm
10284679Sjhb/*
103165635Sbde * For userland, always use lock prefixes so that the binaries will run
104165635Sbde * on both SMP and !SMP systems.
10584679Sjhb */
10684679Sjhb#if defined(SMP) || !defined(_KERNEL)
107165630Sbde#define	MPLOCKED	"lock ; "
10890515Sbde#else
109147855Sjhb#define	MPLOCKED
11090515Sbde#endif
11138517Sdfr
11248797Salc/*
113197803Sattilio * The assembly is volatilized to avoid code chunk removal by the compiler.
114197803Sattilio * GCC aggressively reorders operations and memory clobbering is necessary
115197803Sattilio * in order to avoid that for memory barriers.
11648797Salc */
117147855Sjhb#define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)		\
11848797Salcstatic __inline void					\
11949043Salcatomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
12048797Salc{							\
121165630Sbde	__asm __volatile(MPLOCKED OP			\
122254612Sjkim	: "+m" (*p)					\
123254612Sjkim	: CONS (V)					\
124216524Skib	: "cc");					\
125122827Sbde}							\
126197803Sattilio							\
127197803Sattiliostatic __inline void					\
128197803Sattilioatomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
129197803Sattilio{							\
130197803Sattilio	__asm __volatile(MPLOCKED OP			\
131254612Sjkim	: "+m" (*p)					\
132254612Sjkim	: CONS (V)					\
133216524Skib	: "memory", "cc");				\
134197803Sattilio}							\
135122827Sbdestruct __hack
136100327Smarkm
13765514Sphk/*
13865514Sphk * Atomic compare and set, used by the mutex functions
13965514Sphk *
140208332Sphk * if (*dst == expect) *dst = src (all 32 bit words)
14165514Sphk *
14265514Sphk * Returns 0 on failure, non-zero on success
14365514Sphk */
14465514Sphk
145165635Sbde#ifdef CPU_DISABLE_CMPXCHG
146100327Smarkm
147197910Sattiliostatic __inline int
148208332Sphkatomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src)
149197910Sattilio{
150197910Sattilio	u_char res;
15165514Sphk
152197910Sattilio	__asm __volatile(
153197910Sattilio	"	pushfl ;		"
154197910Sattilio	"	cli ;			"
155254612Sjkim	"	cmpl	%3,%1 ;		"
156197910Sattilio	"	jne	1f ;		"
157197910Sattilio	"	movl	%2,%1 ;		"
158197910Sattilio	"1:				"
159197910Sattilio	"       sete	%0 ;		"
160197910Sattilio	"	popfl ;			"
161197910Sattilio	"# atomic_cmpset_int"
162197910Sattilio	: "=q" (res),			/* 0 */
163254612Sjkim	  "+m" (*dst)			/* 1 */
164197910Sattilio	: "r" (src),			/* 2 */
165254612Sjkim	  "r" (expect)			/* 3 */
166197910Sattilio	: "memory");
167197910Sattilio	return (res);
168197910Sattilio}
169197910Sattilio
170165635Sbde#else /* !CPU_DISABLE_CMPXCHG */
171100327Smarkm
172197910Sattiliostatic __inline int
173208332Sphkatomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src)
174197910Sattilio{
175197910Sattilio	u_char res;
17665514Sphk
177197910Sattilio	__asm __volatile(
178197910Sattilio	"	" MPLOCKED "		"
179254614Sjkim	"	cmpxchgl %3,%1 ;	"
180197910Sattilio	"       sete	%0 ;		"
181197910Sattilio	"# atomic_cmpset_int"
182254614Sjkim	: "=q" (res),			/* 0 */
183254614Sjkim	  "+m" (*dst),			/* 1 */
184254614Sjkim	  "+a" (expect)			/* 2 */
185254614Sjkim	: "r" (src)			/* 3 */
186216524Skib	: "memory", "cc");
187197910Sattilio	return (res);
188197910Sattilio}
189197910Sattilio
190165635Sbde#endif /* CPU_DISABLE_CMPXCHG */
191100327Smarkm
192150627Sjhb/*
193150627Sjhb * Atomically add the value of v to the integer pointed to by p and return
194150627Sjhb * the previous value of *p.
195150627Sjhb */
196150627Sjhbstatic __inline u_int
197150627Sjhbatomic_fetchadd_int(volatile u_int *p, u_int v)
198150627Sjhb{
199150627Sjhb
200165633Sbde	__asm __volatile(
201165630Sbde	"	" MPLOCKED "		"
202254610Sjkim	"	xaddl	%0,%1 ;		"
203150627Sjhb	"# atomic_fetchadd_int"
204254610Sjkim	: "+r" (v),			/* 0 */
205254612Sjkim	  "+m" (*p)			/* 1 */
206254612Sjkim	: : "cc");
207150627Sjhb	return (v);
208150627Sjhb}
209150627Sjhb
210254617Sjkimstatic __inline int
211254617Sjkimatomic_testandset_int(volatile u_int *p, u_int v)
212254617Sjkim{
213254617Sjkim	u_char res;
214254617Sjkim
215254617Sjkim	__asm __volatile(
216254617Sjkim	"	" MPLOCKED "		"
217254617Sjkim	"	btsl	%2,%1 ;		"
218254617Sjkim	"	setc	%0 ;		"
219254617Sjkim	"# atomic_testandset_int"
220254617Sjkim	: "=q" (res),			/* 0 */
221254617Sjkim	  "+m" (*p)			/* 1 */
222254617Sjkim	: "Ir" (v & 0x1f)		/* 2 */
223254617Sjkim	: "cc");
224254617Sjkim	return (res);
225254617Sjkim}
226254617Sjkim
227236456Skib/*
228236456Skib * We assume that a = b will do atomic loads and stores.  Due to the
229236456Skib * IA32 memory model, a simple store guarantees release semantics.
230236456Skib *
231236456Skib * However, loads may pass stores, so for atomic_load_acq we have to
232236456Skib * ensure a Store/Load barrier to do the load in SMP kernels.  We use
233236456Skib * "lock cmpxchg" as recommended by the AMD Software Optimization
234236456Skib * Guide, and not mfence.  For UP kernels, however, the cache of the
235236456Skib * single processor is always consistent, so we only need to take care
236236456Skib * of the compiler.
237236456Skib */
238236456Skib#define	ATOMIC_STORE(TYPE)				\
239236456Skibstatic __inline void					\
240236456Skibatomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
241236456Skib{							\
242241374Sattilio	__compiler_membar();				\
243236456Skib	*p = v;						\
244236456Skib}							\
245236456Skibstruct __hack
246236456Skib
247137623Sjhb#if defined(_KERNEL) && !defined(SMP)
248100327Smarkm
249236456Skib#define	ATOMIC_LOAD(TYPE, LOP)				\
25067351Sjhbstatic __inline u_##TYPE				\
25167351Sjhbatomic_load_acq_##TYPE(volatile u_##TYPE *p)		\
25267351Sjhb{							\
253197803Sattilio	u_##TYPE tmp;					\
254197803Sattilio							\
255197803Sattilio	tmp = *p;					\
256241374Sattilio	__compiler_membar();				\
257197803Sattilio	return (tmp);					\
25867351Sjhb}							\
259122827Sbdestruct __hack
260100327Smarkm
261165635Sbde#else /* !(_KERNEL && !SMP) */
26267351Sjhb
263236456Skib#define	ATOMIC_LOAD(TYPE, LOP)				\
26471023Sjhbstatic __inline u_##TYPE				\
26571023Sjhbatomic_load_acq_##TYPE(volatile u_##TYPE *p)		\
26671023Sjhb{							\
26771023Sjhb	u_##TYPE res;					\
26871023Sjhb							\
269165630Sbde	__asm __volatile(MPLOCKED LOP			\
270165635Sbde	: "=a" (res),			/* 0 */		\
271254612Sjkim	  "+m" (*p)			/* 1 */		\
272254612Sjkim	: : "memory", "cc");				\
27371023Sjhb	return (res);					\
27471023Sjhb}							\
275122827Sbdestruct __hack
276100327Smarkm
277165635Sbde#endif /* _KERNEL && !SMP */
278100327Smarkm
279254619Sjkim#ifdef _KERNEL
280254619Sjkim
281254619Sjkim#ifdef WANT_FUNCTIONS
282254620Sjkimint		atomic_cmpset_64_i386(volatile uint64_t *, uint64_t, uint64_t);
283254620Sjkimint		atomic_cmpset_64_i586(volatile uint64_t *, uint64_t, uint64_t);
284254619Sjkimuint64_t	atomic_load_acq_64_i386(volatile uint64_t *);
285254619Sjkimuint64_t	atomic_load_acq_64_i586(volatile uint64_t *);
286254619Sjkimvoid		atomic_store_rel_64_i386(volatile uint64_t *, uint64_t);
287254619Sjkimvoid		atomic_store_rel_64_i586(volatile uint64_t *, uint64_t);
288254620Sjkimuint64_t	atomic_swap_64_i386(volatile uint64_t *, uint64_t);
289254620Sjkimuint64_t	atomic_swap_64_i586(volatile uint64_t *, uint64_t);
290254619Sjkim#endif
291254619Sjkim
292254619Sjkim/* I486 does not support SMP or CMPXCHG8B. */
293254620Sjkimstatic __inline int
294254620Sjkimatomic_cmpset_64_i386(volatile uint64_t *dst, uint64_t expect, uint64_t src)
295254620Sjkim{
296254620Sjkim	volatile uint32_t *p;
297254620Sjkim	u_char res;
298254620Sjkim
299254620Sjkim	p = (volatile uint32_t *)dst;
300254620Sjkim	__asm __volatile(
301254620Sjkim	"	pushfl ;		"
302254620Sjkim	"	cli ;			"
303254620Sjkim	"	xorl	%1,%%eax ;	"
304254620Sjkim	"	xorl	%2,%%edx ;	"
305254620Sjkim	"	orl	%%edx,%%eax ;	"
306254620Sjkim	"	jne	1f ;		"
307254620Sjkim	"	movl	%4,%1 ;		"
308254620Sjkim	"	movl	%5,%2 ;		"
309254620Sjkim	"1:				"
310254620Sjkim	"	sete	%3 ;		"
311254620Sjkim	"	popfl"
312254620Sjkim	: "+A" (expect),		/* 0 */
313254620Sjkim	  "+m" (*p),			/* 1 */
314254620Sjkim	  "+m" (*(p + 1)),		/* 2 */
315254620Sjkim	  "=q" (res)			/* 3 */
316254620Sjkim	: "r" ((uint32_t)src),		/* 4 */
317254620Sjkim	  "r" ((uint32_t)(src >> 32))	/* 5 */
318254620Sjkim	: "memory", "cc");
319254620Sjkim	return (res);
320254620Sjkim}
321254620Sjkim
322254619Sjkimstatic __inline uint64_t
323254619Sjkimatomic_load_acq_64_i386(volatile uint64_t *p)
324254619Sjkim{
325254619Sjkim	volatile uint32_t *q;
326254619Sjkim	uint64_t res;
327254619Sjkim
328254619Sjkim	q = (volatile uint32_t *)p;
329254619Sjkim	__asm __volatile(
330254619Sjkim	"	pushfl ;		"
331254619Sjkim	"	cli ;			"
332254619Sjkim	"	movl	%1,%%eax ;	"
333254619Sjkim	"	movl	%2,%%edx ;	"
334254619Sjkim	"	popfl"
335254619Sjkim	: "=&A" (res)			/* 0 */
336254619Sjkim	: "m" (*q),			/* 1 */
337254619Sjkim	  "m" (*(q + 1))		/* 2 */
338254619Sjkim	: "memory");
339254619Sjkim	return (res);
340254619Sjkim}
341254619Sjkim
342254619Sjkimstatic __inline void
343254619Sjkimatomic_store_rel_64_i386(volatile uint64_t *p, uint64_t v)
344254619Sjkim{
345254619Sjkim	volatile uint32_t *q;
346254619Sjkim
347254619Sjkim	q = (volatile uint32_t *)p;
348254619Sjkim	__asm __volatile(
349254619Sjkim	"	pushfl ;		"
350254619Sjkim	"	cli ;			"
351254619Sjkim	"	movl	%%eax,%0 ;	"
352254619Sjkim	"	movl	%%edx,%1 ;	"
353254619Sjkim	"	popfl"
354254619Sjkim	: "=m" (*q),			/* 0 */
355254619Sjkim	  "=m" (*(q + 1))		/* 1 */
356254619Sjkim	: "A" (v)			/* 2 */
357254619Sjkim	: "memory");
358254619Sjkim}
359254619Sjkim
360254619Sjkimstatic __inline uint64_t
361254620Sjkimatomic_swap_64_i386(volatile uint64_t *p, uint64_t v)
362254620Sjkim{
363254620Sjkim	volatile uint32_t *q;
364254620Sjkim	uint64_t res;
365254620Sjkim
366254620Sjkim	q = (volatile uint32_t *)p;
367254620Sjkim	__asm __volatile(
368254620Sjkim	"	pushfl ;		"
369254620Sjkim	"	cli ;			"
370254620Sjkim	"	movl	%1,%%eax ;	"
371254620Sjkim	"	movl	%2,%%edx ;	"
372254620Sjkim	"	movl	%4,%2 ;		"
373254620Sjkim	"	movl	%3,%1 ;		"
374254620Sjkim	"	popfl"
375254620Sjkim	: "=&A" (res),			/* 0 */
376254620Sjkim	  "+m" (*q),			/* 1 */
377254620Sjkim	  "+m" (*(q + 1))		/* 2 */
378254620Sjkim	: "r" ((uint32_t)v),		/* 3 */
379254620Sjkim	  "r" ((uint32_t)(v >> 32)));	/* 4 */
380254620Sjkim	return (res);
381254620Sjkim}
382254620Sjkim
383254620Sjkimstatic __inline int
384254620Sjkimatomic_cmpset_64_i586(volatile uint64_t *dst, uint64_t expect, uint64_t src)
385254620Sjkim{
386254620Sjkim	u_char res;
387254620Sjkim
388254620Sjkim	__asm __volatile(
389254620Sjkim	"	" MPLOCKED "		"
390254620Sjkim	"	cmpxchg8b %1 ;		"
391254620Sjkim	"	sete	%0"
392254620Sjkim	: "=q" (res),			/* 0 */
393254620Sjkim	  "+m" (*dst),			/* 1 */
394254620Sjkim	  "+A" (expect)			/* 2 */
395254620Sjkim	: "b" ((uint32_t)src),		/* 3 */
396254620Sjkim	  "c" ((uint32_t)(src >> 32))	/* 4 */
397254620Sjkim	: "memory", "cc");
398254620Sjkim	return (res);
399254620Sjkim}
400254620Sjkim
401254620Sjkimstatic __inline uint64_t
402254619Sjkimatomic_load_acq_64_i586(volatile uint64_t *p)
403254619Sjkim{
404254619Sjkim	uint64_t res;
405254619Sjkim
406254619Sjkim	__asm __volatile(
407254619Sjkim	"	movl	%%ebx,%%eax ;	"
408254619Sjkim	"	movl	%%ecx,%%edx ;	"
409254619Sjkim	"	" MPLOCKED "		"
410254619Sjkim	"	cmpxchg8b %1"
411254619Sjkim	: "=&A" (res),			/* 0 */
412254619Sjkim	  "+m" (*p)			/* 1 */
413254619Sjkim	: : "memory", "cc");
414254619Sjkim	return (res);
415254619Sjkim}
416254619Sjkim
417254619Sjkimstatic __inline void
418254619Sjkimatomic_store_rel_64_i586(volatile uint64_t *p, uint64_t v)
419254619Sjkim{
420254619Sjkim
421254619Sjkim	__asm __volatile(
422254619Sjkim	"	movl	%%eax,%%ebx ;	"
423254619Sjkim	"	movl	%%edx,%%ecx ;	"
424254619Sjkim	"1:				"
425254619Sjkim	"	" MPLOCKED "		"
426254619Sjkim	"	cmpxchg8b %0 ;		"
427254619Sjkim	"	jne	1b"
428254619Sjkim	: "+m" (*p),			/* 0 */
429254619Sjkim	  "+A" (v)			/* 1 */
430254619Sjkim	: : "ebx", "ecx", "memory", "cc");
431254619Sjkim}
432254619Sjkim
433254619Sjkimstatic __inline uint64_t
434254620Sjkimatomic_swap_64_i586(volatile uint64_t *p, uint64_t v)
435254620Sjkim{
436254620Sjkim
437254620Sjkim	__asm __volatile(
438254620Sjkim	"	movl	%%eax,%%ebx ;	"
439254620Sjkim	"	movl	%%edx,%%ecx ;	"
440254620Sjkim	"1:				"
441254620Sjkim	"	" MPLOCKED "		"
442254620Sjkim	"	cmpxchg8b %0 ;		"
443254620Sjkim	"	jne	1b"
444254620Sjkim	: "+m" (*p),			/* 0 */
445254620Sjkim	  "+A" (v)			/* 1 */
446254620Sjkim	: : "ebx", "ecx", "memory", "cc");
447254620Sjkim	return (v);
448254620Sjkim}
449254620Sjkim
450254620Sjkimstatic __inline int
451254620Sjkimatomic_cmpset_64(volatile uint64_t *dst, uint64_t expect, uint64_t src)
452254620Sjkim{
453254620Sjkim
454254620Sjkim	if ((cpu_feature & CPUID_CX8) == 0)
455254620Sjkim		return (atomic_cmpset_64_i386(dst, expect, src));
456254620Sjkim	else
457254620Sjkim		return (atomic_cmpset_64_i586(dst, expect, src));
458254620Sjkim}
459254620Sjkim
460254620Sjkimstatic __inline uint64_t
461254619Sjkimatomic_load_acq_64(volatile uint64_t *p)
462254619Sjkim{
463254619Sjkim
464254619Sjkim	if ((cpu_feature & CPUID_CX8) == 0)
465254619Sjkim		return (atomic_load_acq_64_i386(p));
466254619Sjkim	else
467254619Sjkim		return (atomic_load_acq_64_i586(p));
468254619Sjkim}
469254619Sjkim
470254619Sjkimstatic __inline void
471254619Sjkimatomic_store_rel_64(volatile uint64_t *p, uint64_t v)
472254619Sjkim{
473254619Sjkim
474254619Sjkim	if ((cpu_feature & CPUID_CX8) == 0)
475254619Sjkim		atomic_store_rel_64_i386(p, v);
476254619Sjkim	else
477254619Sjkim		atomic_store_rel_64_i586(p, v);
478254619Sjkim}
479254619Sjkim
480254620Sjkimstatic __inline uint64_t
481254620Sjkimatomic_swap_64(volatile uint64_t *p, uint64_t v)
482254620Sjkim{
483254620Sjkim
484254620Sjkim	if ((cpu_feature & CPUID_CX8) == 0)
485254620Sjkim		return (atomic_swap_64_i386(p, v));
486254620Sjkim	else
487254620Sjkim		return (atomic_swap_64_i586(p, v));
488254620Sjkim}
489254620Sjkim
490254619Sjkim#endif /* _KERNEL */
491254619Sjkim
492147855Sjhb#endif /* KLD_MODULE || !__GNUCLIKE_ASM */
493100251Smarkm
494100251SmarkmATOMIC_ASM(set,	     char,  "orb %b1,%0",  "iq",  v);
495100251SmarkmATOMIC_ASM(clear,    char,  "andb %b1,%0", "iq", ~v);
496100251SmarkmATOMIC_ASM(add,	     char,  "addb %b1,%0", "iq",  v);
497100251SmarkmATOMIC_ASM(subtract, char,  "subb %b1,%0", "iq",  v);
49871085Sjhb
499100251SmarkmATOMIC_ASM(set,	     short, "orw %w1,%0",  "ir",  v);
500100251SmarkmATOMIC_ASM(clear,    short, "andw %w1,%0", "ir", ~v);
501100251SmarkmATOMIC_ASM(add,	     short, "addw %w1,%0", "ir",  v);
502100251SmarkmATOMIC_ASM(subtract, short, "subw %w1,%0", "ir",  v);
50371085Sjhb
504100251SmarkmATOMIC_ASM(set,	     int,   "orl %1,%0",   "ir",  v);
505100251SmarkmATOMIC_ASM(clear,    int,   "andl %1,%0",  "ir", ~v);
506100251SmarkmATOMIC_ASM(add,	     int,   "addl %1,%0",  "ir",  v);
507100251SmarkmATOMIC_ASM(subtract, int,   "subl %1,%0",  "ir",  v);
50871085Sjhb
509100251SmarkmATOMIC_ASM(set,	     long,  "orl %1,%0",   "ir",  v);
510100251SmarkmATOMIC_ASM(clear,    long,  "andl %1,%0",  "ir", ~v);
511100251SmarkmATOMIC_ASM(add,	     long,  "addl %1,%0",  "ir",  v);
512100251SmarkmATOMIC_ASM(subtract, long,  "subl %1,%0",  "ir",  v);
51371085Sjhb
514236456SkibATOMIC_LOAD(char,  "cmpxchgb %b0,%1");
515236456SkibATOMIC_LOAD(short, "cmpxchgw %w0,%1");
516236456SkibATOMIC_LOAD(int,   "cmpxchgl %0,%1");
517236456SkibATOMIC_LOAD(long,  "cmpxchgl %0,%1");
51871023Sjhb
519236456SkibATOMIC_STORE(char);
520236456SkibATOMIC_STORE(short);
521236456SkibATOMIC_STORE(int);
522236456SkibATOMIC_STORE(long);
523236456Skib
52471085Sjhb#undef ATOMIC_ASM
525236456Skib#undef ATOMIC_LOAD
526236456Skib#undef ATOMIC_STORE
52767351Sjhb
528165635Sbde#ifndef WANT_FUNCTIONS
529147855Sjhb
530147855Sjhbstatic __inline int
531208332Sphkatomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src)
532147855Sjhb{
533147855Sjhb
534208332Sphk	return (atomic_cmpset_int((volatile u_int *)dst, (u_int)expect,
535147855Sjhb	    (u_int)src));
536147855Sjhb}
537147855Sjhb
538177276Spjdstatic __inline u_long
539177276Spjdatomic_fetchadd_long(volatile u_long *p, u_long v)
540177276Spjd{
541177276Spjd
542177276Spjd	return (atomic_fetchadd_int((volatile u_int *)p, (u_int)v));
543177276Spjd}
544177276Spjd
545254617Sjkimstatic __inline int
546254617Sjkimatomic_testandset_long(volatile u_long *p, u_int v)
547254617Sjkim{
548254617Sjkim
549254617Sjkim	return (atomic_testandset_int((volatile u_int *)p, v));
550254617Sjkim}
551254617Sjkim
552254617Sjkim/* Read the current value and store a new value in the destination. */
553147855Sjhb#ifdef __GNUCLIKE_ASM
554147855Sjhb
555147855Sjhbstatic __inline u_int
556254617Sjkimatomic_swap_int(volatile u_int *p, u_int v)
557147855Sjhb{
558147855Sjhb
559165633Sbde	__asm __volatile(
560147855Sjhb	"	xchgl	%1,%0 ;		"
561254617Sjkim	"# atomic_swap_int"
562254617Sjkim	: "+r" (v),			/* 0 */
563254612Sjkim	  "+m" (*p));			/* 1 */
564254617Sjkim	return (v);
565147855Sjhb}
566147855Sjhb
567147855Sjhbstatic __inline u_long
568254617Sjkimatomic_swap_long(volatile u_long *p, u_long v)
569147855Sjhb{
570147855Sjhb
571254617Sjkim	return (atomic_swap_int((volatile u_int *)p, (u_int)v));
572147855Sjhb}
573147855Sjhb
574147855Sjhb#else /* !__GNUCLIKE_ASM */
575147855Sjhb
576254617Sjkimu_int	atomic_swap_int(volatile u_int *p, u_int v);
577254617Sjkimu_long	atomic_swap_long(volatile u_long *p, u_long v);
578147855Sjhb
579147855Sjhb#endif /* __GNUCLIKE_ASM */
580147855Sjhb
581197803Sattilio#define	atomic_set_acq_char		atomic_set_barr_char
582197803Sattilio#define	atomic_set_rel_char		atomic_set_barr_char
583197803Sattilio#define	atomic_clear_acq_char		atomic_clear_barr_char
584197803Sattilio#define	atomic_clear_rel_char		atomic_clear_barr_char
585197803Sattilio#define	atomic_add_acq_char		atomic_add_barr_char
586197803Sattilio#define	atomic_add_rel_char		atomic_add_barr_char
587197803Sattilio#define	atomic_subtract_acq_char	atomic_subtract_barr_char
588197803Sattilio#define	atomic_subtract_rel_char	atomic_subtract_barr_char
58971085Sjhb
590197803Sattilio#define	atomic_set_acq_short		atomic_set_barr_short
591197803Sattilio#define	atomic_set_rel_short		atomic_set_barr_short
592197803Sattilio#define	atomic_clear_acq_short		atomic_clear_barr_short
593197803Sattilio#define	atomic_clear_rel_short		atomic_clear_barr_short
594197803Sattilio#define	atomic_add_acq_short		atomic_add_barr_short
595197803Sattilio#define	atomic_add_rel_short		atomic_add_barr_short
596197803Sattilio#define	atomic_subtract_acq_short	atomic_subtract_barr_short
597197803Sattilio#define	atomic_subtract_rel_short	atomic_subtract_barr_short
59871085Sjhb
599197803Sattilio#define	atomic_set_acq_int		atomic_set_barr_int
600197803Sattilio#define	atomic_set_rel_int		atomic_set_barr_int
601197803Sattilio#define	atomic_clear_acq_int		atomic_clear_barr_int
602197803Sattilio#define	atomic_clear_rel_int		atomic_clear_barr_int
603197803Sattilio#define	atomic_add_acq_int		atomic_add_barr_int
604197803Sattilio#define	atomic_add_rel_int		atomic_add_barr_int
605197803Sattilio#define	atomic_subtract_acq_int		atomic_subtract_barr_int
606197803Sattilio#define	atomic_subtract_rel_int		atomic_subtract_barr_int
607197910Sattilio#define	atomic_cmpset_acq_int		atomic_cmpset_int
608197910Sattilio#define	atomic_cmpset_rel_int		atomic_cmpset_int
60971085Sjhb
610197803Sattilio#define	atomic_set_acq_long		atomic_set_barr_long
611197803Sattilio#define	atomic_set_rel_long		atomic_set_barr_long
612197803Sattilio#define	atomic_clear_acq_long		atomic_clear_barr_long
613197803Sattilio#define	atomic_clear_rel_long		atomic_clear_barr_long
614197803Sattilio#define	atomic_add_acq_long		atomic_add_barr_long
615197803Sattilio#define	atomic_add_rel_long		atomic_add_barr_long
616197803Sattilio#define	atomic_subtract_acq_long	atomic_subtract_barr_long
617197803Sattilio#define	atomic_subtract_rel_long	atomic_subtract_barr_long
618197910Sattilio#define	atomic_cmpset_acq_long		atomic_cmpset_long
619197910Sattilio#define	atomic_cmpset_rel_long		atomic_cmpset_long
62071085Sjhb
621254617Sjkim#define	atomic_readandclear_int(p)	atomic_swap_int(p, 0)
622254617Sjkim#define	atomic_readandclear_long(p)	atomic_swap_long(p, 0)
623254617Sjkim
624147855Sjhb/* Operations on 8-bit bytes. */
62571085Sjhb#define	atomic_set_8		atomic_set_char
62671085Sjhb#define	atomic_set_acq_8	atomic_set_acq_char
62771085Sjhb#define	atomic_set_rel_8	atomic_set_rel_char
62871085Sjhb#define	atomic_clear_8		atomic_clear_char
62971085Sjhb#define	atomic_clear_acq_8	atomic_clear_acq_char
63071085Sjhb#define	atomic_clear_rel_8	atomic_clear_rel_char
63171085Sjhb#define	atomic_add_8		atomic_add_char
63271085Sjhb#define	atomic_add_acq_8	atomic_add_acq_char
63371085Sjhb#define	atomic_add_rel_8	atomic_add_rel_char
63471085Sjhb#define	atomic_subtract_8	atomic_subtract_char
63571085Sjhb#define	atomic_subtract_acq_8	atomic_subtract_acq_char
63671085Sjhb#define	atomic_subtract_rel_8	atomic_subtract_rel_char
63771085Sjhb#define	atomic_load_acq_8	atomic_load_acq_char
63871085Sjhb#define	atomic_store_rel_8	atomic_store_rel_char
63971085Sjhb
640147855Sjhb/* Operations on 16-bit words. */
64171085Sjhb#define	atomic_set_16		atomic_set_short
64271085Sjhb#define	atomic_set_acq_16	atomic_set_acq_short
64371085Sjhb#define	atomic_set_rel_16	atomic_set_rel_short
64471085Sjhb#define	atomic_clear_16		atomic_clear_short
64571085Sjhb#define	atomic_clear_acq_16	atomic_clear_acq_short
64671085Sjhb#define	atomic_clear_rel_16	atomic_clear_rel_short
64771085Sjhb#define	atomic_add_16		atomic_add_short
64871085Sjhb#define	atomic_add_acq_16	atomic_add_acq_short
64971085Sjhb#define	atomic_add_rel_16	atomic_add_rel_short
65071085Sjhb#define	atomic_subtract_16	atomic_subtract_short
65171085Sjhb#define	atomic_subtract_acq_16	atomic_subtract_acq_short
65271085Sjhb#define	atomic_subtract_rel_16	atomic_subtract_rel_short
65371085Sjhb#define	atomic_load_acq_16	atomic_load_acq_short
65471085Sjhb#define	atomic_store_rel_16	atomic_store_rel_short
65571085Sjhb
656147855Sjhb/* Operations on 32-bit double words. */
65771085Sjhb#define	atomic_set_32		atomic_set_int
65871085Sjhb#define	atomic_set_acq_32	atomic_set_acq_int
65971085Sjhb#define	atomic_set_rel_32	atomic_set_rel_int
66071085Sjhb#define	atomic_clear_32		atomic_clear_int
66171085Sjhb#define	atomic_clear_acq_32	atomic_clear_acq_int
66271085Sjhb#define	atomic_clear_rel_32	atomic_clear_rel_int
66371085Sjhb#define	atomic_add_32		atomic_add_int
66471085Sjhb#define	atomic_add_acq_32	atomic_add_acq_int
66571085Sjhb#define	atomic_add_rel_32	atomic_add_rel_int
66671085Sjhb#define	atomic_subtract_32	atomic_subtract_int
66771085Sjhb#define	atomic_subtract_acq_32	atomic_subtract_acq_int
66871085Sjhb#define	atomic_subtract_rel_32	atomic_subtract_rel_int
66971085Sjhb#define	atomic_load_acq_32	atomic_load_acq_int
67071085Sjhb#define	atomic_store_rel_32	atomic_store_rel_int
67171085Sjhb#define	atomic_cmpset_32	atomic_cmpset_int
67271085Sjhb#define	atomic_cmpset_acq_32	atomic_cmpset_acq_int
67371085Sjhb#define	atomic_cmpset_rel_32	atomic_cmpset_rel_int
674254617Sjkim#define	atomic_swap_32		atomic_swap_int
67571085Sjhb#define	atomic_readandclear_32	atomic_readandclear_int
676150627Sjhb#define	atomic_fetchadd_32	atomic_fetchadd_int
677254617Sjkim#define	atomic_testandset_32	atomic_testandset_int
67871085Sjhb
679147855Sjhb/* Operations on pointers. */
680157212Sdes#define	atomic_set_ptr(p, v) \
681157212Sdes	atomic_set_int((volatile u_int *)(p), (u_int)(v))
682157212Sdes#define	atomic_set_acq_ptr(p, v) \
683157212Sdes	atomic_set_acq_int((volatile u_int *)(p), (u_int)(v))
684157212Sdes#define	atomic_set_rel_ptr(p, v) \
685157212Sdes	atomic_set_rel_int((volatile u_int *)(p), (u_int)(v))
686157212Sdes#define	atomic_clear_ptr(p, v) \
687157212Sdes	atomic_clear_int((volatile u_int *)(p), (u_int)(v))
688157212Sdes#define	atomic_clear_acq_ptr(p, v) \
689157212Sdes	atomic_clear_acq_int((volatile u_int *)(p), (u_int)(v))
690157212Sdes#define	atomic_clear_rel_ptr(p, v) \
691157212Sdes	atomic_clear_rel_int((volatile u_int *)(p), (u_int)(v))
692157212Sdes#define	atomic_add_ptr(p, v) \
693157212Sdes	atomic_add_int((volatile u_int *)(p), (u_int)(v))
694157212Sdes#define	atomic_add_acq_ptr(p, v) \
695157212Sdes	atomic_add_acq_int((volatile u_int *)(p), (u_int)(v))
696157212Sdes#define	atomic_add_rel_ptr(p, v) \
697157212Sdes	atomic_add_rel_int((volatile u_int *)(p), (u_int)(v))
698157212Sdes#define	atomic_subtract_ptr(p, v) \
699157212Sdes	atomic_subtract_int((volatile u_int *)(p), (u_int)(v))
700157212Sdes#define	atomic_subtract_acq_ptr(p, v) \
701157212Sdes	atomic_subtract_acq_int((volatile u_int *)(p), (u_int)(v))
702157212Sdes#define	atomic_subtract_rel_ptr(p, v) \
703157212Sdes	atomic_subtract_rel_int((volatile u_int *)(p), (u_int)(v))
704157212Sdes#define	atomic_load_acq_ptr(p) \
705157212Sdes	atomic_load_acq_int((volatile u_int *)(p))
706157212Sdes#define	atomic_store_rel_ptr(p, v) \
707157212Sdes	atomic_store_rel_int((volatile u_int *)(p), (v))
708157212Sdes#define	atomic_cmpset_ptr(dst, old, new) \
709157212Sdes	atomic_cmpset_int((volatile u_int *)(dst), (u_int)(old), (u_int)(new))
710157212Sdes#define	atomic_cmpset_acq_ptr(dst, old, new) \
711165633Sbde	atomic_cmpset_acq_int((volatile u_int *)(dst), (u_int)(old), \
712165633Sbde	    (u_int)(new))
713157212Sdes#define	atomic_cmpset_rel_ptr(dst, old, new) \
714165633Sbde	atomic_cmpset_rel_int((volatile u_int *)(dst), (u_int)(old), \
715165633Sbde	    (u_int)(new))
716254617Sjkim#define	atomic_swap_ptr(p, v) \
717254617Sjkim	atomic_swap_int((volatile u_int *)(p), (u_int)(v))
718157212Sdes#define	atomic_readandclear_ptr(p) \
719157212Sdes	atomic_readandclear_int((volatile u_int *)(p))
72065514Sphk
721165635Sbde#endif /* !WANT_FUNCTIONS */
722165633Sbde
723165633Sbde#endif /* !_MACHINE_ATOMIC_H_ */
724