atomic.h revision 284901
138517Sdfr/*-
238517Sdfr * Copyright (c) 1998 Doug Rabson
338517Sdfr * All rights reserved.
438517Sdfr *
538517Sdfr * Redistribution and use in source and binary forms, with or without
638517Sdfr * modification, are permitted provided that the following conditions
738517Sdfr * are met:
838517Sdfr * 1. Redistributions of source code must retain the above copyright
938517Sdfr *    notice, this list of conditions and the following disclaimer.
1038517Sdfr * 2. Redistributions in binary form must reproduce the above copyright
1138517Sdfr *    notice, this list of conditions and the following disclaimer in the
1238517Sdfr *    documentation and/or other materials provided with the distribution.
1338517Sdfr *
1438517Sdfr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1538517Sdfr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1638517Sdfr * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1738517Sdfr * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1838517Sdfr * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1938517Sdfr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2038517Sdfr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2138517Sdfr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2238517Sdfr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2338517Sdfr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2438517Sdfr * SUCH DAMAGE.
2538517Sdfr *
2650477Speter * $FreeBSD: head/sys/i386/include/atomic.h 284901 2015-06-28 05:04:08Z kib $
2738517Sdfr */
2838517Sdfr#ifndef _MACHINE_ATOMIC_H_
29147855Sjhb#define	_MACHINE_ATOMIC_H_
3038517Sdfr
31143063Sjoerg#ifndef _SYS_CDEFS_H_
32143063Sjoerg#error this file needs sys/cdefs.h as a prerequisite
33143063Sjoerg#endif
34143063Sjoerg
35254619Sjkim#ifdef _KERNEL
36254619Sjkim#include <machine/md_var.h>
37254619Sjkim#include <machine/specialreg.h>
38254619Sjkim#endif
39254619Sjkim
40236456Skib#define	mb()	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc")
41236456Skib#define	wmb()	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc")
42236456Skib#define	rmb()	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc")
43185162Skmacy
4438517Sdfr/*
45165635Sbde * Various simple operations on memory, each of which is atomic in the
46165635Sbde * presence of interrupts and multiple processors.
4738517Sdfr *
48165633Sbde * atomic_set_char(P, V)	(*(u_char *)(P) |= (V))
49165633Sbde * atomic_clear_char(P, V)	(*(u_char *)(P) &= ~(V))
50165633Sbde * atomic_add_char(P, V)	(*(u_char *)(P) += (V))
51165633Sbde * atomic_subtract_char(P, V)	(*(u_char *)(P) -= (V))
5248797Salc *
53165633Sbde * atomic_set_short(P, V)	(*(u_short *)(P) |= (V))
54165633Sbde * atomic_clear_short(P, V)	(*(u_short *)(P) &= ~(V))
55165633Sbde * atomic_add_short(P, V)	(*(u_short *)(P) += (V))
56165633Sbde * atomic_subtract_short(P, V)	(*(u_short *)(P) -= (V))
5748797Salc *
58165633Sbde * atomic_set_int(P, V)		(*(u_int *)(P) |= (V))
59165633Sbde * atomic_clear_int(P, V)	(*(u_int *)(P) &= ~(V))
60165633Sbde * atomic_add_int(P, V)		(*(u_int *)(P) += (V))
61165633Sbde * atomic_subtract_int(P, V)	(*(u_int *)(P) -= (V))
62254617Sjkim * atomic_swap_int(P, V)	(return (*(u_int *)(P)); *(u_int *)(P) = (V);)
63165635Sbde * atomic_readandclear_int(P)	(return (*(u_int *)(P)); *(u_int *)(P) = 0;)
6448797Salc *
65165633Sbde * atomic_set_long(P, V)	(*(u_long *)(P) |= (V))
66165633Sbde * atomic_clear_long(P, V)	(*(u_long *)(P) &= ~(V))
67165633Sbde * atomic_add_long(P, V)	(*(u_long *)(P) += (V))
68165633Sbde * atomic_subtract_long(P, V)	(*(u_long *)(P) -= (V))
69254617Sjkim * atomic_swap_long(P, V)	(return (*(u_long *)(P)); *(u_long *)(P) = (V);)
70165635Sbde * atomic_readandclear_long(P)	(return (*(u_long *)(P)); *(u_long *)(P) = 0;)
7138517Sdfr */
7238517Sdfr
7348797Salc/*
7449999Salc * The above functions are expanded inline in the statically-linked
7549999Salc * kernel.  Lock prefixes are generated if an SMP kernel is being
7649999Salc * built.
7749999Salc *
7849999Salc * Kernel modules call real functions which are built into the kernel.
7949999Salc * This allows kernel modules to be portable between UP and SMP systems.
8048797Salc */
81147855Sjhb#if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM)
82147855Sjhb#define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)			\
83197803Sattiliovoid atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v);	\
84197803Sattiliovoid atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
8549999Salc
86208332Sphkint	atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src);
87165633Sbdeu_int	atomic_fetchadd_int(volatile u_int *p, u_int v);
88254617Sjkimint	atomic_testandset_int(volatile u_int *p, u_int v);
8965514Sphk
90284901Skib#define	ATOMIC_LOAD(TYPE)					\
91236456Skibu_##TYPE	atomic_load_acq_##TYPE(volatile u_##TYPE *p)
92236456Skib#define	ATOMIC_STORE(TYPE)					\
93100251Smarkmvoid		atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
9471085Sjhb
95254620Sjkimint		atomic_cmpset_64(volatile uint64_t *, uint64_t, uint64_t);
96254619Sjkimuint64_t	atomic_load_acq_64(volatile uint64_t *);
97254619Sjkimvoid		atomic_store_rel_64(volatile uint64_t *, uint64_t);
98254620Sjkimuint64_t	atomic_swap_64(volatile uint64_t *, uint64_t);
99254619Sjkim
100147855Sjhb#else /* !KLD_MODULE && __GNUCLIKE_ASM */
10172358Smarkm
10284679Sjhb/*
103165635Sbde * For userland, always use lock prefixes so that the binaries will run
104165635Sbde * on both SMP and !SMP systems.
10584679Sjhb */
10684679Sjhb#if defined(SMP) || !defined(_KERNEL)
107165630Sbde#define	MPLOCKED	"lock ; "
10890515Sbde#else
109147855Sjhb#define	MPLOCKED
11090515Sbde#endif
11138517Sdfr
11248797Salc/*
113197803Sattilio * The assembly is volatilized to avoid code chunk removal by the compiler.
114197803Sattilio * GCC aggressively reorders operations and memory clobbering is necessary
115197803Sattilio * in order to avoid that for memory barriers.
11648797Salc */
117147855Sjhb#define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)		\
11848797Salcstatic __inline void					\
11949043Salcatomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
12048797Salc{							\
121165630Sbde	__asm __volatile(MPLOCKED OP			\
122254612Sjkim	: "+m" (*p)					\
123254612Sjkim	: CONS (V)					\
124216524Skib	: "cc");					\
125122827Sbde}							\
126197803Sattilio							\
127197803Sattiliostatic __inline void					\
128197803Sattilioatomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
129197803Sattilio{							\
130197803Sattilio	__asm __volatile(MPLOCKED OP			\
131254612Sjkim	: "+m" (*p)					\
132254612Sjkim	: CONS (V)					\
133216524Skib	: "memory", "cc");				\
134197803Sattilio}							\
135122827Sbdestruct __hack
136100327Smarkm
13765514Sphk/*
13865514Sphk * Atomic compare and set, used by the mutex functions
13965514Sphk *
140208332Sphk * if (*dst == expect) *dst = src (all 32 bit words)
14165514Sphk *
14265514Sphk * Returns 0 on failure, non-zero on success
14365514Sphk */
14465514Sphk
145165635Sbde#ifdef CPU_DISABLE_CMPXCHG
146100327Smarkm
147197910Sattiliostatic __inline int
148208332Sphkatomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src)
149197910Sattilio{
150197910Sattilio	u_char res;
15165514Sphk
152197910Sattilio	__asm __volatile(
153197910Sattilio	"	pushfl ;		"
154197910Sattilio	"	cli ;			"
155254612Sjkim	"	cmpl	%3,%1 ;		"
156197910Sattilio	"	jne	1f ;		"
157197910Sattilio	"	movl	%2,%1 ;		"
158197910Sattilio	"1:				"
159197910Sattilio	"       sete	%0 ;		"
160197910Sattilio	"	popfl ;			"
161197910Sattilio	"# atomic_cmpset_int"
162197910Sattilio	: "=q" (res),			/* 0 */
163254612Sjkim	  "+m" (*dst)			/* 1 */
164197910Sattilio	: "r" (src),			/* 2 */
165254612Sjkim	  "r" (expect)			/* 3 */
166197910Sattilio	: "memory");
167197910Sattilio	return (res);
168197910Sattilio}
169197910Sattilio
170165635Sbde#else /* !CPU_DISABLE_CMPXCHG */
171100327Smarkm
172197910Sattiliostatic __inline int
173208332Sphkatomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src)
174197910Sattilio{
175197910Sattilio	u_char res;
17665514Sphk
177197910Sattilio	__asm __volatile(
178197910Sattilio	"	" MPLOCKED "		"
179254614Sjkim	"	cmpxchgl %3,%1 ;	"
180197910Sattilio	"       sete	%0 ;		"
181197910Sattilio	"# atomic_cmpset_int"
182254614Sjkim	: "=q" (res),			/* 0 */
183254614Sjkim	  "+m" (*dst),			/* 1 */
184254614Sjkim	  "+a" (expect)			/* 2 */
185254614Sjkim	: "r" (src)			/* 3 */
186216524Skib	: "memory", "cc");
187197910Sattilio	return (res);
188197910Sattilio}
189197910Sattilio
190165635Sbde#endif /* CPU_DISABLE_CMPXCHG */
191100327Smarkm
192150627Sjhb/*
193150627Sjhb * Atomically add the value of v to the integer pointed to by p and return
194150627Sjhb * the previous value of *p.
195150627Sjhb */
196150627Sjhbstatic __inline u_int
197150627Sjhbatomic_fetchadd_int(volatile u_int *p, u_int v)
198150627Sjhb{
199150627Sjhb
200165633Sbde	__asm __volatile(
201165630Sbde	"	" MPLOCKED "		"
202254610Sjkim	"	xaddl	%0,%1 ;		"
203150627Sjhb	"# atomic_fetchadd_int"
204254610Sjkim	: "+r" (v),			/* 0 */
205254612Sjkim	  "+m" (*p)			/* 1 */
206254612Sjkim	: : "cc");
207150627Sjhb	return (v);
208150627Sjhb}
209150627Sjhb
210254617Sjkimstatic __inline int
211254617Sjkimatomic_testandset_int(volatile u_int *p, u_int v)
212254617Sjkim{
213254617Sjkim	u_char res;
214254617Sjkim
215254617Sjkim	__asm __volatile(
216254617Sjkim	"	" MPLOCKED "		"
217254617Sjkim	"	btsl	%2,%1 ;		"
218254617Sjkim	"	setc	%0 ;		"
219254617Sjkim	"# atomic_testandset_int"
220254617Sjkim	: "=q" (res),			/* 0 */
221254617Sjkim	  "+m" (*p)			/* 1 */
222254617Sjkim	: "Ir" (v & 0x1f)		/* 2 */
223254617Sjkim	: "cc");
224254617Sjkim	return (res);
225254617Sjkim}
226254617Sjkim
227236456Skib/*
228236456Skib * We assume that a = b will do atomic loads and stores.  Due to the
229236456Skib * IA32 memory model, a simple store guarantees release semantics.
230236456Skib *
231284901Skib * However, a load may pass a store if they are performed on distinct
232284901Skib * addresses, so for atomic_load_acq we introduce a Store/Load barrier
233284901Skib * before the load in SMP kernels.  We use "lock addl $0,mem", as
234284901Skib * recommended by the AMD Software Optimization Guide, and not mfence.
235284901Skib * In the kernel, we use a private per-cpu cache line as the target
236284901Skib * for the locked addition, to avoid introducing false data
237284901Skib * dependencies.  In userspace, a word at the top of the stack is
238284901Skib * utilized.
239284901Skib *
240284901Skib * For UP kernels, however, the memory of the single processor is
241284901Skib * always consistent, so we only need to stop the compiler from
242284901Skib * reordering accesses in a way that violates the semantics of acquire
243284901Skib * and release.
244236456Skib */
245284901Skib#if defined(_KERNEL)
246236456Skib
247284901Skib/*
248284901Skib * OFFSETOF_MONITORBUF == __pcpu_offset(pc_monitorbuf).
249284901Skib *
250284901Skib * The open-coded number is used instead of the symbolic expression to
251284901Skib * avoid a dependency on sys/pcpu.h in machine/atomic.h consumers.
252284901Skib * An assertion in i386/vm_machdep.c ensures that the value is correct.
253284901Skib */
254284901Skib#define	OFFSETOF_MONITORBUF	0x180
255100327Smarkm
256284901Skib#if defined(SMP)
257284901Skibstatic __inline void
258284901Skib__storeload_barrier(void)
259284901Skib{
260100327Smarkm
261284901Skib	__asm __volatile("lock; addl $0,%%fs:%0"
262284901Skib	    : "+m" (*(u_int *)OFFSETOF_MONITORBUF) : : "memory", "cc");
263284901Skib}
264284901Skib#else /* _KERNEL && UP */
265284901Skibstatic __inline void
266284901Skib__storeload_barrier(void)
267284901Skib{
26867351Sjhb
269284901Skib	__compiler_membar();
270284901Skib}
271284901Skib#endif /* SMP */
272284901Skib#else /* !_KERNEL */
273284901Skibstatic __inline void
274284901Skib__storeload_barrier(void)
275284901Skib{
276284901Skib
277284901Skib	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc");
278284901Skib}
279284901Skib#endif /* _KERNEL*/
280284901Skib
281284901Skib#define	ATOMIC_LOAD(TYPE)					\
282284901Skibstatic __inline u_##TYPE					\
283284901Skibatomic_load_acq_##TYPE(volatile u_##TYPE *p)			\
284284901Skib{								\
285284901Skib	u_##TYPE res;						\
286284901Skib								\
287284901Skib	__storeload_barrier();					\
288284901Skib	res = *p;						\
289284901Skib	__compiler_membar();					\
290284901Skib	return (res);						\
291284901Skib}								\
292122827Sbdestruct __hack
293100327Smarkm
294284901Skib#define	ATOMIC_STORE(TYPE)					\
295284901Skibstatic __inline void						\
296284901Skibatomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)	\
297284901Skib{								\
298284901Skib								\
299284901Skib	__compiler_membar();					\
300284901Skib	*p = v;							\
301284901Skib}								\
302284901Skibstruct __hack
303100327Smarkm
304254619Sjkim#ifdef _KERNEL
305254619Sjkim
306254619Sjkim#ifdef WANT_FUNCTIONS
307254620Sjkimint		atomic_cmpset_64_i386(volatile uint64_t *, uint64_t, uint64_t);
308254620Sjkimint		atomic_cmpset_64_i586(volatile uint64_t *, uint64_t, uint64_t);
309254619Sjkimuint64_t	atomic_load_acq_64_i386(volatile uint64_t *);
310254619Sjkimuint64_t	atomic_load_acq_64_i586(volatile uint64_t *);
311254619Sjkimvoid		atomic_store_rel_64_i386(volatile uint64_t *, uint64_t);
312254619Sjkimvoid		atomic_store_rel_64_i586(volatile uint64_t *, uint64_t);
313254620Sjkimuint64_t	atomic_swap_64_i386(volatile uint64_t *, uint64_t);
314254620Sjkimuint64_t	atomic_swap_64_i586(volatile uint64_t *, uint64_t);
315254619Sjkim#endif
316254619Sjkim
317254619Sjkim/* I486 does not support SMP or CMPXCHG8B. */
318254620Sjkimstatic __inline int
319254620Sjkimatomic_cmpset_64_i386(volatile uint64_t *dst, uint64_t expect, uint64_t src)
320254620Sjkim{
321254620Sjkim	volatile uint32_t *p;
322254620Sjkim	u_char res;
323254620Sjkim
324254620Sjkim	p = (volatile uint32_t *)dst;
325254620Sjkim	__asm __volatile(
326254620Sjkim	"	pushfl ;		"
327254620Sjkim	"	cli ;			"
328254620Sjkim	"	xorl	%1,%%eax ;	"
329254620Sjkim	"	xorl	%2,%%edx ;	"
330254620Sjkim	"	orl	%%edx,%%eax ;	"
331254620Sjkim	"	jne	1f ;		"
332254620Sjkim	"	movl	%4,%1 ;		"
333254620Sjkim	"	movl	%5,%2 ;		"
334254620Sjkim	"1:				"
335254620Sjkim	"	sete	%3 ;		"
336254620Sjkim	"	popfl"
337254620Sjkim	: "+A" (expect),		/* 0 */
338254620Sjkim	  "+m" (*p),			/* 1 */
339254620Sjkim	  "+m" (*(p + 1)),		/* 2 */
340254620Sjkim	  "=q" (res)			/* 3 */
341254620Sjkim	: "r" ((uint32_t)src),		/* 4 */
342254620Sjkim	  "r" ((uint32_t)(src >> 32))	/* 5 */
343254620Sjkim	: "memory", "cc");
344254620Sjkim	return (res);
345254620Sjkim}
346254620Sjkim
347254619Sjkimstatic __inline uint64_t
348254619Sjkimatomic_load_acq_64_i386(volatile uint64_t *p)
349254619Sjkim{
350254619Sjkim	volatile uint32_t *q;
351254619Sjkim	uint64_t res;
352254619Sjkim
353254619Sjkim	q = (volatile uint32_t *)p;
354254619Sjkim	__asm __volatile(
355254619Sjkim	"	pushfl ;		"
356254619Sjkim	"	cli ;			"
357254619Sjkim	"	movl	%1,%%eax ;	"
358254619Sjkim	"	movl	%2,%%edx ;	"
359254619Sjkim	"	popfl"
360254619Sjkim	: "=&A" (res)			/* 0 */
361254619Sjkim	: "m" (*q),			/* 1 */
362254619Sjkim	  "m" (*(q + 1))		/* 2 */
363254619Sjkim	: "memory");
364254619Sjkim	return (res);
365254619Sjkim}
366254619Sjkim
367254619Sjkimstatic __inline void
368254619Sjkimatomic_store_rel_64_i386(volatile uint64_t *p, uint64_t v)
369254619Sjkim{
370254619Sjkim	volatile uint32_t *q;
371254619Sjkim
372254619Sjkim	q = (volatile uint32_t *)p;
373254619Sjkim	__asm __volatile(
374254619Sjkim	"	pushfl ;		"
375254619Sjkim	"	cli ;			"
376254619Sjkim	"	movl	%%eax,%0 ;	"
377254619Sjkim	"	movl	%%edx,%1 ;	"
378254619Sjkim	"	popfl"
379254619Sjkim	: "=m" (*q),			/* 0 */
380254619Sjkim	  "=m" (*(q + 1))		/* 1 */
381254619Sjkim	: "A" (v)			/* 2 */
382254619Sjkim	: "memory");
383254619Sjkim}
384254619Sjkim
385254619Sjkimstatic __inline uint64_t
386254620Sjkimatomic_swap_64_i386(volatile uint64_t *p, uint64_t v)
387254620Sjkim{
388254620Sjkim	volatile uint32_t *q;
389254620Sjkim	uint64_t res;
390254620Sjkim
391254620Sjkim	q = (volatile uint32_t *)p;
392254620Sjkim	__asm __volatile(
393254620Sjkim	"	pushfl ;		"
394254620Sjkim	"	cli ;			"
395254620Sjkim	"	movl	%1,%%eax ;	"
396254620Sjkim	"	movl	%2,%%edx ;	"
397254620Sjkim	"	movl	%4,%2 ;		"
398254620Sjkim	"	movl	%3,%1 ;		"
399254620Sjkim	"	popfl"
400254620Sjkim	: "=&A" (res),			/* 0 */
401254620Sjkim	  "+m" (*q),			/* 1 */
402254620Sjkim	  "+m" (*(q + 1))		/* 2 */
403254620Sjkim	: "r" ((uint32_t)v),		/* 3 */
404254620Sjkim	  "r" ((uint32_t)(v >> 32)));	/* 4 */
405254620Sjkim	return (res);
406254620Sjkim}
407254620Sjkim
408254620Sjkimstatic __inline int
409254620Sjkimatomic_cmpset_64_i586(volatile uint64_t *dst, uint64_t expect, uint64_t src)
410254620Sjkim{
411254620Sjkim	u_char res;
412254620Sjkim
413254620Sjkim	__asm __volatile(
414254620Sjkim	"	" MPLOCKED "		"
415254620Sjkim	"	cmpxchg8b %1 ;		"
416254620Sjkim	"	sete	%0"
417254620Sjkim	: "=q" (res),			/* 0 */
418254620Sjkim	  "+m" (*dst),			/* 1 */
419254620Sjkim	  "+A" (expect)			/* 2 */
420254620Sjkim	: "b" ((uint32_t)src),		/* 3 */
421254620Sjkim	  "c" ((uint32_t)(src >> 32))	/* 4 */
422254620Sjkim	: "memory", "cc");
423254620Sjkim	return (res);
424254620Sjkim}
425254620Sjkim
426254620Sjkimstatic __inline uint64_t
427254619Sjkimatomic_load_acq_64_i586(volatile uint64_t *p)
428254619Sjkim{
429254619Sjkim	uint64_t res;
430254619Sjkim
431254619Sjkim	__asm __volatile(
432254619Sjkim	"	movl	%%ebx,%%eax ;	"
433254619Sjkim	"	movl	%%ecx,%%edx ;	"
434254619Sjkim	"	" MPLOCKED "		"
435254619Sjkim	"	cmpxchg8b %1"
436254619Sjkim	: "=&A" (res),			/* 0 */
437254619Sjkim	  "+m" (*p)			/* 1 */
438254619Sjkim	: : "memory", "cc");
439254619Sjkim	return (res);
440254619Sjkim}
441254619Sjkim
442254619Sjkimstatic __inline void
443254619Sjkimatomic_store_rel_64_i586(volatile uint64_t *p, uint64_t v)
444254619Sjkim{
445254619Sjkim
446254619Sjkim	__asm __volatile(
447254619Sjkim	"	movl	%%eax,%%ebx ;	"
448254619Sjkim	"	movl	%%edx,%%ecx ;	"
449254619Sjkim	"1:				"
450254619Sjkim	"	" MPLOCKED "		"
451254619Sjkim	"	cmpxchg8b %0 ;		"
452254619Sjkim	"	jne	1b"
453254619Sjkim	: "+m" (*p),			/* 0 */
454254619Sjkim	  "+A" (v)			/* 1 */
455254619Sjkim	: : "ebx", "ecx", "memory", "cc");
456254619Sjkim}
457254619Sjkim
458254619Sjkimstatic __inline uint64_t
459254620Sjkimatomic_swap_64_i586(volatile uint64_t *p, uint64_t v)
460254620Sjkim{
461254620Sjkim
462254620Sjkim	__asm __volatile(
463254620Sjkim	"	movl	%%eax,%%ebx ;	"
464254620Sjkim	"	movl	%%edx,%%ecx ;	"
465254620Sjkim	"1:				"
466254620Sjkim	"	" MPLOCKED "		"
467254620Sjkim	"	cmpxchg8b %0 ;		"
468254620Sjkim	"	jne	1b"
469254620Sjkim	: "+m" (*p),			/* 0 */
470254620Sjkim	  "+A" (v)			/* 1 */
471254620Sjkim	: : "ebx", "ecx", "memory", "cc");
472254620Sjkim	return (v);
473254620Sjkim}
474254620Sjkim
475254620Sjkimstatic __inline int
476254620Sjkimatomic_cmpset_64(volatile uint64_t *dst, uint64_t expect, uint64_t src)
477254620Sjkim{
478254620Sjkim
479254620Sjkim	if ((cpu_feature & CPUID_CX8) == 0)
480254620Sjkim		return (atomic_cmpset_64_i386(dst, expect, src));
481254620Sjkim	else
482254620Sjkim		return (atomic_cmpset_64_i586(dst, expect, src));
483254620Sjkim}
484254620Sjkim
485254620Sjkimstatic __inline uint64_t
486254619Sjkimatomic_load_acq_64(volatile uint64_t *p)
487254619Sjkim{
488254619Sjkim
489254619Sjkim	if ((cpu_feature & CPUID_CX8) == 0)
490254619Sjkim		return (atomic_load_acq_64_i386(p));
491254619Sjkim	else
492254619Sjkim		return (atomic_load_acq_64_i586(p));
493254619Sjkim}
494254619Sjkim
495254619Sjkimstatic __inline void
496254619Sjkimatomic_store_rel_64(volatile uint64_t *p, uint64_t v)
497254619Sjkim{
498254619Sjkim
499254619Sjkim	if ((cpu_feature & CPUID_CX8) == 0)
500254619Sjkim		atomic_store_rel_64_i386(p, v);
501254619Sjkim	else
502254619Sjkim		atomic_store_rel_64_i586(p, v);
503254619Sjkim}
504254619Sjkim
505254620Sjkimstatic __inline uint64_t
506254620Sjkimatomic_swap_64(volatile uint64_t *p, uint64_t v)
507254620Sjkim{
508254620Sjkim
509254620Sjkim	if ((cpu_feature & CPUID_CX8) == 0)
510254620Sjkim		return (atomic_swap_64_i386(p, v));
511254620Sjkim	else
512254620Sjkim		return (atomic_swap_64_i586(p, v));
513254620Sjkim}
514254620Sjkim
515254619Sjkim#endif /* _KERNEL */
516254619Sjkim
517147855Sjhb#endif /* KLD_MODULE || !__GNUCLIKE_ASM */
518100251Smarkm
519100251SmarkmATOMIC_ASM(set,	     char,  "orb %b1,%0",  "iq",  v);
520100251SmarkmATOMIC_ASM(clear,    char,  "andb %b1,%0", "iq", ~v);
521100251SmarkmATOMIC_ASM(add,	     char,  "addb %b1,%0", "iq",  v);
522100251SmarkmATOMIC_ASM(subtract, char,  "subb %b1,%0", "iq",  v);
52371085Sjhb
524100251SmarkmATOMIC_ASM(set,	     short, "orw %w1,%0",  "ir",  v);
525100251SmarkmATOMIC_ASM(clear,    short, "andw %w1,%0", "ir", ~v);
526100251SmarkmATOMIC_ASM(add,	     short, "addw %w1,%0", "ir",  v);
527100251SmarkmATOMIC_ASM(subtract, short, "subw %w1,%0", "ir",  v);
52871085Sjhb
529100251SmarkmATOMIC_ASM(set,	     int,   "orl %1,%0",   "ir",  v);
530100251SmarkmATOMIC_ASM(clear,    int,   "andl %1,%0",  "ir", ~v);
531100251SmarkmATOMIC_ASM(add,	     int,   "addl %1,%0",  "ir",  v);
532100251SmarkmATOMIC_ASM(subtract, int,   "subl %1,%0",  "ir",  v);
53371085Sjhb
534100251SmarkmATOMIC_ASM(set,	     long,  "orl %1,%0",   "ir",  v);
535100251SmarkmATOMIC_ASM(clear,    long,  "andl %1,%0",  "ir", ~v);
536100251SmarkmATOMIC_ASM(add,	     long,  "addl %1,%0",  "ir",  v);
537100251SmarkmATOMIC_ASM(subtract, long,  "subl %1,%0",  "ir",  v);
53871085Sjhb
539284901Skib#define	ATOMIC_LOADSTORE(TYPE)				\
540284901Skib	ATOMIC_LOAD(TYPE);				\
541284901Skib	ATOMIC_STORE(TYPE)
54271023Sjhb
543284901SkibATOMIC_LOADSTORE(char);
544284901SkibATOMIC_LOADSTORE(short);
545284901SkibATOMIC_LOADSTORE(int);
546284901SkibATOMIC_LOADSTORE(long);
547236456Skib
54871085Sjhb#undef ATOMIC_ASM
549236456Skib#undef ATOMIC_LOAD
550236456Skib#undef ATOMIC_STORE
551284901Skib#undef ATOMIC_LOADSTORE
55267351Sjhb
553165635Sbde#ifndef WANT_FUNCTIONS
554147855Sjhb
555147855Sjhbstatic __inline int
556208332Sphkatomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src)
557147855Sjhb{
558147855Sjhb
559208332Sphk	return (atomic_cmpset_int((volatile u_int *)dst, (u_int)expect,
560147855Sjhb	    (u_int)src));
561147855Sjhb}
562147855Sjhb
563177276Spjdstatic __inline u_long
564177276Spjdatomic_fetchadd_long(volatile u_long *p, u_long v)
565177276Spjd{
566177276Spjd
567177276Spjd	return (atomic_fetchadd_int((volatile u_int *)p, (u_int)v));
568177276Spjd}
569177276Spjd
570254617Sjkimstatic __inline int
571254617Sjkimatomic_testandset_long(volatile u_long *p, u_int v)
572254617Sjkim{
573254617Sjkim
574254617Sjkim	return (atomic_testandset_int((volatile u_int *)p, v));
575254617Sjkim}
576254617Sjkim
577254617Sjkim/* Read the current value and store a new value in the destination. */
578147855Sjhb#ifdef __GNUCLIKE_ASM
579147855Sjhb
580147855Sjhbstatic __inline u_int
581254617Sjkimatomic_swap_int(volatile u_int *p, u_int v)
582147855Sjhb{
583147855Sjhb
584165633Sbde	__asm __volatile(
585147855Sjhb	"	xchgl	%1,%0 ;		"
586254617Sjkim	"# atomic_swap_int"
587254617Sjkim	: "+r" (v),			/* 0 */
588254612Sjkim	  "+m" (*p));			/* 1 */
589254617Sjkim	return (v);
590147855Sjhb}
591147855Sjhb
592147855Sjhbstatic __inline u_long
593254617Sjkimatomic_swap_long(volatile u_long *p, u_long v)
594147855Sjhb{
595147855Sjhb
596254617Sjkim	return (atomic_swap_int((volatile u_int *)p, (u_int)v));
597147855Sjhb}
598147855Sjhb
599147855Sjhb#else /* !__GNUCLIKE_ASM */
600147855Sjhb
601254617Sjkimu_int	atomic_swap_int(volatile u_int *p, u_int v);
602254617Sjkimu_long	atomic_swap_long(volatile u_long *p, u_long v);
603147855Sjhb
604147855Sjhb#endif /* __GNUCLIKE_ASM */
605147855Sjhb
606197803Sattilio#define	atomic_set_acq_char		atomic_set_barr_char
607197803Sattilio#define	atomic_set_rel_char		atomic_set_barr_char
608197803Sattilio#define	atomic_clear_acq_char		atomic_clear_barr_char
609197803Sattilio#define	atomic_clear_rel_char		atomic_clear_barr_char
610197803Sattilio#define	atomic_add_acq_char		atomic_add_barr_char
611197803Sattilio#define	atomic_add_rel_char		atomic_add_barr_char
612197803Sattilio#define	atomic_subtract_acq_char	atomic_subtract_barr_char
613197803Sattilio#define	atomic_subtract_rel_char	atomic_subtract_barr_char
61471085Sjhb
615197803Sattilio#define	atomic_set_acq_short		atomic_set_barr_short
616197803Sattilio#define	atomic_set_rel_short		atomic_set_barr_short
617197803Sattilio#define	atomic_clear_acq_short		atomic_clear_barr_short
618197803Sattilio#define	atomic_clear_rel_short		atomic_clear_barr_short
619197803Sattilio#define	atomic_add_acq_short		atomic_add_barr_short
620197803Sattilio#define	atomic_add_rel_short		atomic_add_barr_short
621197803Sattilio#define	atomic_subtract_acq_short	atomic_subtract_barr_short
622197803Sattilio#define	atomic_subtract_rel_short	atomic_subtract_barr_short
62371085Sjhb
624197803Sattilio#define	atomic_set_acq_int		atomic_set_barr_int
625197803Sattilio#define	atomic_set_rel_int		atomic_set_barr_int
626197803Sattilio#define	atomic_clear_acq_int		atomic_clear_barr_int
627197803Sattilio#define	atomic_clear_rel_int		atomic_clear_barr_int
628197803Sattilio#define	atomic_add_acq_int		atomic_add_barr_int
629197803Sattilio#define	atomic_add_rel_int		atomic_add_barr_int
630197803Sattilio#define	atomic_subtract_acq_int		atomic_subtract_barr_int
631197803Sattilio#define	atomic_subtract_rel_int		atomic_subtract_barr_int
632197910Sattilio#define	atomic_cmpset_acq_int		atomic_cmpset_int
633197910Sattilio#define	atomic_cmpset_rel_int		atomic_cmpset_int
63471085Sjhb
635197803Sattilio#define	atomic_set_acq_long		atomic_set_barr_long
636197803Sattilio#define	atomic_set_rel_long		atomic_set_barr_long
637197803Sattilio#define	atomic_clear_acq_long		atomic_clear_barr_long
638197803Sattilio#define	atomic_clear_rel_long		atomic_clear_barr_long
639197803Sattilio#define	atomic_add_acq_long		atomic_add_barr_long
640197803Sattilio#define	atomic_add_rel_long		atomic_add_barr_long
641197803Sattilio#define	atomic_subtract_acq_long	atomic_subtract_barr_long
642197803Sattilio#define	atomic_subtract_rel_long	atomic_subtract_barr_long
643197910Sattilio#define	atomic_cmpset_acq_long		atomic_cmpset_long
644197910Sattilio#define	atomic_cmpset_rel_long		atomic_cmpset_long
64571085Sjhb
646254617Sjkim#define	atomic_readandclear_int(p)	atomic_swap_int(p, 0)
647254617Sjkim#define	atomic_readandclear_long(p)	atomic_swap_long(p, 0)
648254617Sjkim
649147855Sjhb/* Operations on 8-bit bytes. */
65071085Sjhb#define	atomic_set_8		atomic_set_char
65171085Sjhb#define	atomic_set_acq_8	atomic_set_acq_char
65271085Sjhb#define	atomic_set_rel_8	atomic_set_rel_char
65371085Sjhb#define	atomic_clear_8		atomic_clear_char
65471085Sjhb#define	atomic_clear_acq_8	atomic_clear_acq_char
65571085Sjhb#define	atomic_clear_rel_8	atomic_clear_rel_char
65671085Sjhb#define	atomic_add_8		atomic_add_char
65771085Sjhb#define	atomic_add_acq_8	atomic_add_acq_char
65871085Sjhb#define	atomic_add_rel_8	atomic_add_rel_char
65971085Sjhb#define	atomic_subtract_8	atomic_subtract_char
66071085Sjhb#define	atomic_subtract_acq_8	atomic_subtract_acq_char
66171085Sjhb#define	atomic_subtract_rel_8	atomic_subtract_rel_char
66271085Sjhb#define	atomic_load_acq_8	atomic_load_acq_char
66371085Sjhb#define	atomic_store_rel_8	atomic_store_rel_char
66471085Sjhb
665147855Sjhb/* Operations on 16-bit words. */
66671085Sjhb#define	atomic_set_16		atomic_set_short
66771085Sjhb#define	atomic_set_acq_16	atomic_set_acq_short
66871085Sjhb#define	atomic_set_rel_16	atomic_set_rel_short
66971085Sjhb#define	atomic_clear_16		atomic_clear_short
67071085Sjhb#define	atomic_clear_acq_16	atomic_clear_acq_short
67171085Sjhb#define	atomic_clear_rel_16	atomic_clear_rel_short
67271085Sjhb#define	atomic_add_16		atomic_add_short
67371085Sjhb#define	atomic_add_acq_16	atomic_add_acq_short
67471085Sjhb#define	atomic_add_rel_16	atomic_add_rel_short
67571085Sjhb#define	atomic_subtract_16	atomic_subtract_short
67671085Sjhb#define	atomic_subtract_acq_16	atomic_subtract_acq_short
67771085Sjhb#define	atomic_subtract_rel_16	atomic_subtract_rel_short
67871085Sjhb#define	atomic_load_acq_16	atomic_load_acq_short
67971085Sjhb#define	atomic_store_rel_16	atomic_store_rel_short
68071085Sjhb
681147855Sjhb/* Operations on 32-bit double words. */
68271085Sjhb#define	atomic_set_32		atomic_set_int
68371085Sjhb#define	atomic_set_acq_32	atomic_set_acq_int
68471085Sjhb#define	atomic_set_rel_32	atomic_set_rel_int
68571085Sjhb#define	atomic_clear_32		atomic_clear_int
68671085Sjhb#define	atomic_clear_acq_32	atomic_clear_acq_int
68771085Sjhb#define	atomic_clear_rel_32	atomic_clear_rel_int
68871085Sjhb#define	atomic_add_32		atomic_add_int
68971085Sjhb#define	atomic_add_acq_32	atomic_add_acq_int
69071085Sjhb#define	atomic_add_rel_32	atomic_add_rel_int
69171085Sjhb#define	atomic_subtract_32	atomic_subtract_int
69271085Sjhb#define	atomic_subtract_acq_32	atomic_subtract_acq_int
69371085Sjhb#define	atomic_subtract_rel_32	atomic_subtract_rel_int
69471085Sjhb#define	atomic_load_acq_32	atomic_load_acq_int
69571085Sjhb#define	atomic_store_rel_32	atomic_store_rel_int
69671085Sjhb#define	atomic_cmpset_32	atomic_cmpset_int
69771085Sjhb#define	atomic_cmpset_acq_32	atomic_cmpset_acq_int
69871085Sjhb#define	atomic_cmpset_rel_32	atomic_cmpset_rel_int
699254617Sjkim#define	atomic_swap_32		atomic_swap_int
70071085Sjhb#define	atomic_readandclear_32	atomic_readandclear_int
701150627Sjhb#define	atomic_fetchadd_32	atomic_fetchadd_int
702254617Sjkim#define	atomic_testandset_32	atomic_testandset_int
70371085Sjhb
704147855Sjhb/* Operations on pointers. */
705157212Sdes#define	atomic_set_ptr(p, v) \
706157212Sdes	atomic_set_int((volatile u_int *)(p), (u_int)(v))
707157212Sdes#define	atomic_set_acq_ptr(p, v) \
708157212Sdes	atomic_set_acq_int((volatile u_int *)(p), (u_int)(v))
709157212Sdes#define	atomic_set_rel_ptr(p, v) \
710157212Sdes	atomic_set_rel_int((volatile u_int *)(p), (u_int)(v))
711157212Sdes#define	atomic_clear_ptr(p, v) \
712157212Sdes	atomic_clear_int((volatile u_int *)(p), (u_int)(v))
713157212Sdes#define	atomic_clear_acq_ptr(p, v) \
714157212Sdes	atomic_clear_acq_int((volatile u_int *)(p), (u_int)(v))
715157212Sdes#define	atomic_clear_rel_ptr(p, v) \
716157212Sdes	atomic_clear_rel_int((volatile u_int *)(p), (u_int)(v))
717157212Sdes#define	atomic_add_ptr(p, v) \
718157212Sdes	atomic_add_int((volatile u_int *)(p), (u_int)(v))
719157212Sdes#define	atomic_add_acq_ptr(p, v) \
720157212Sdes	atomic_add_acq_int((volatile u_int *)(p), (u_int)(v))
721157212Sdes#define	atomic_add_rel_ptr(p, v) \
722157212Sdes	atomic_add_rel_int((volatile u_int *)(p), (u_int)(v))
723157212Sdes#define	atomic_subtract_ptr(p, v) \
724157212Sdes	atomic_subtract_int((volatile u_int *)(p), (u_int)(v))
725157212Sdes#define	atomic_subtract_acq_ptr(p, v) \
726157212Sdes	atomic_subtract_acq_int((volatile u_int *)(p), (u_int)(v))
727157212Sdes#define	atomic_subtract_rel_ptr(p, v) \
728157212Sdes	atomic_subtract_rel_int((volatile u_int *)(p), (u_int)(v))
729157212Sdes#define	atomic_load_acq_ptr(p) \
730157212Sdes	atomic_load_acq_int((volatile u_int *)(p))
731157212Sdes#define	atomic_store_rel_ptr(p, v) \
732157212Sdes	atomic_store_rel_int((volatile u_int *)(p), (v))
733157212Sdes#define	atomic_cmpset_ptr(dst, old, new) \
734157212Sdes	atomic_cmpset_int((volatile u_int *)(dst), (u_int)(old), (u_int)(new))
735157212Sdes#define	atomic_cmpset_acq_ptr(dst, old, new) \
736165633Sbde	atomic_cmpset_acq_int((volatile u_int *)(dst), (u_int)(old), \
737165633Sbde	    (u_int)(new))
738157212Sdes#define	atomic_cmpset_rel_ptr(dst, old, new) \
739165633Sbde	atomic_cmpset_rel_int((volatile u_int *)(dst), (u_int)(old), \
740165633Sbde	    (u_int)(new))
741254617Sjkim#define	atomic_swap_ptr(p, v) \
742254617Sjkim	atomic_swap_int((volatile u_int *)(p), (u_int)(v))
743157212Sdes#define	atomic_readandclear_ptr(p) \
744157212Sdes	atomic_readandclear_int((volatile u_int *)(p))
74565514Sphk
746165635Sbde#endif /* !WANT_FUNCTIONS */
747165633Sbde
748165633Sbde#endif /* !_MACHINE_ATOMIC_H_ */
749