138517Sdfr/*-
238517Sdfr * Copyright (c) 1998 Doug Rabson
338517Sdfr * All rights reserved.
438517Sdfr *
538517Sdfr * Redistribution and use in source and binary forms, with or without
638517Sdfr * modification, are permitted provided that the following conditions
738517Sdfr * are met:
838517Sdfr * 1. Redistributions of source code must retain the above copyright
938517Sdfr *    notice, this list of conditions and the following disclaimer.
1038517Sdfr * 2. Redistributions in binary form must reproduce the above copyright
1138517Sdfr *    notice, this list of conditions and the following disclaimer in the
1238517Sdfr *    documentation and/or other materials provided with the distribution.
1338517Sdfr *
1438517Sdfr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1538517Sdfr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1638517Sdfr * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1738517Sdfr * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1838517Sdfr * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1938517Sdfr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2038517Sdfr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2138517Sdfr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2238517Sdfr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2338517Sdfr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2438517Sdfr * SUCH DAMAGE.
2538517Sdfr *
2650477Speter * $FreeBSD: stable/10/sys/i386/include/atomic.h 337041 2018-08-01 12:49:51Z hselasky $
2738517Sdfr */
2838517Sdfr#ifndef _MACHINE_ATOMIC_H_
29147855Sjhb#define	_MACHINE_ATOMIC_H_
3038517Sdfr
31143063Sjoerg#ifndef _SYS_CDEFS_H_
32143063Sjoerg#error this file needs sys/cdefs.h as a prerequisite
33143063Sjoerg#endif
34143063Sjoerg
35254619Sjkim#ifdef _KERNEL
36254619Sjkim#include <machine/md_var.h>
37254619Sjkim#include <machine/specialreg.h>
38254619Sjkim#endif
39254619Sjkim
40236456Skib#define	mb()	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc")
41236456Skib#define	wmb()	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc")
42236456Skib#define	rmb()	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc")
43185162Skmacy
4438517Sdfr/*
45165635Sbde * Various simple operations on memory, each of which is atomic in the
46165635Sbde * presence of interrupts and multiple processors.
4738517Sdfr *
48165633Sbde * atomic_set_char(P, V)	(*(u_char *)(P) |= (V))
49165633Sbde * atomic_clear_char(P, V)	(*(u_char *)(P) &= ~(V))
50165633Sbde * atomic_add_char(P, V)	(*(u_char *)(P) += (V))
51165633Sbde * atomic_subtract_char(P, V)	(*(u_char *)(P) -= (V))
5248797Salc *
53165633Sbde * atomic_set_short(P, V)	(*(u_short *)(P) |= (V))
54165633Sbde * atomic_clear_short(P, V)	(*(u_short *)(P) &= ~(V))
55165633Sbde * atomic_add_short(P, V)	(*(u_short *)(P) += (V))
56165633Sbde * atomic_subtract_short(P, V)	(*(u_short *)(P) -= (V))
5748797Salc *
58165633Sbde * atomic_set_int(P, V)		(*(u_int *)(P) |= (V))
59165633Sbde * atomic_clear_int(P, V)	(*(u_int *)(P) &= ~(V))
60165633Sbde * atomic_add_int(P, V)		(*(u_int *)(P) += (V))
61165633Sbde * atomic_subtract_int(P, V)	(*(u_int *)(P) -= (V))
62254617Sjkim * atomic_swap_int(P, V)	(return (*(u_int *)(P)); *(u_int *)(P) = (V);)
63165635Sbde * atomic_readandclear_int(P)	(return (*(u_int *)(P)); *(u_int *)(P) = 0;)
6448797Salc *
65165633Sbde * atomic_set_long(P, V)	(*(u_long *)(P) |= (V))
66165633Sbde * atomic_clear_long(P, V)	(*(u_long *)(P) &= ~(V))
67165633Sbde * atomic_add_long(P, V)	(*(u_long *)(P) += (V))
68165633Sbde * atomic_subtract_long(P, V)	(*(u_long *)(P) -= (V))
69254617Sjkim * atomic_swap_long(P, V)	(return (*(u_long *)(P)); *(u_long *)(P) = (V);)
70165635Sbde * atomic_readandclear_long(P)	(return (*(u_long *)(P)); *(u_long *)(P) = 0;)
7138517Sdfr */
7238517Sdfr
7348797Salc/*
7449999Salc * The above functions are expanded inline in the statically-linked
7549999Salc * kernel.  Lock prefixes are generated if an SMP kernel is being
7649999Salc * built.
7749999Salc *
7849999Salc * Kernel modules call real functions which are built into the kernel.
7949999Salc * This allows kernel modules to be portable between UP and SMP systems.
8048797Salc */
81147855Sjhb#if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM)
82147855Sjhb#define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)			\
83197803Sattiliovoid atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v);	\
84197803Sattiliovoid atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
8549999Salc
86337041Shselaskyint	atomic_cmpset_char(volatile u_char *dst, u_char expect, u_char src);
87337041Shselaskyint	atomic_cmpset_short(volatile u_short *dst, u_short expect, u_short src);
88208332Sphkint	atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src);
89337041Shselaskyint	atomic_fcmpset_char(volatile u_char *dst, u_char *expect, u_char src);
90337041Shselaskyint	atomic_fcmpset_short(volatile u_short *dst, u_short *expect,
91337041Shselasky	    u_short src);
92337041Shselaskyint	atomic_fcmpset_int(volatile u_int *dst, u_int *expect, u_int src);
93165633Sbdeu_int	atomic_fetchadd_int(volatile u_int *p, u_int v);
94254617Sjkimint	atomic_testandset_int(volatile u_int *p, u_int v);
95302108Ssepheint	atomic_testandclear_int(volatile u_int *p, u_int v);
9665514Sphk
97236456Skib#define	ATOMIC_LOAD(TYPE, LOP)					\
98236456Skibu_##TYPE	atomic_load_acq_##TYPE(volatile u_##TYPE *p)
99236456Skib#define	ATOMIC_STORE(TYPE)					\
100100251Smarkmvoid		atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
10171085Sjhb
102254620Sjkimint		atomic_cmpset_64(volatile uint64_t *, uint64_t, uint64_t);
103254619Sjkimuint64_t	atomic_load_acq_64(volatile uint64_t *);
104254619Sjkimvoid		atomic_store_rel_64(volatile uint64_t *, uint64_t);
105254620Sjkimuint64_t	atomic_swap_64(volatile uint64_t *, uint64_t);
106326515Shselaskyuint64_t	atomic_fetchadd_64(volatile uint64_t *, uint64_t);
107254619Sjkim
108147855Sjhb#else /* !KLD_MODULE && __GNUCLIKE_ASM */
10972358Smarkm
11084679Sjhb/*
111165635Sbde * For userland, always use lock prefixes so that the binaries will run
112165635Sbde * on both SMP and !SMP systems.
11384679Sjhb */
11484679Sjhb#if defined(SMP) || !defined(_KERNEL)
115165630Sbde#define	MPLOCKED	"lock ; "
11690515Sbde#else
117147855Sjhb#define	MPLOCKED
11890515Sbde#endif
11938517Sdfr
12048797Salc/*
121197803Sattilio * The assembly is volatilized to avoid code chunk removal by the compiler.
122197803Sattilio * GCC aggressively reorders operations and memory clobbering is necessary
123197803Sattilio * in order to avoid that for memory barriers.
12448797Salc */
125147855Sjhb#define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)		\
12648797Salcstatic __inline void					\
12749043Salcatomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
12848797Salc{							\
129165630Sbde	__asm __volatile(MPLOCKED OP			\
130254612Sjkim	: "+m" (*p)					\
131254612Sjkim	: CONS (V)					\
132216524Skib	: "cc");					\
133122827Sbde}							\
134197803Sattilio							\
135197803Sattiliostatic __inline void					\
136197803Sattilioatomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
137197803Sattilio{							\
138197803Sattilio	__asm __volatile(MPLOCKED OP			\
139254612Sjkim	: "+m" (*p)					\
140254612Sjkim	: CONS (V)					\
141216524Skib	: "memory", "cc");				\
142197803Sattilio}							\
143122827Sbdestruct __hack
144100327Smarkm
14565514Sphk/*
146337041Shselasky * Atomic compare and set, used by the mutex functions.
14765514Sphk *
148337041Shselasky * cmpset:
149337041Shselasky *	if (*dst == expect)
150337041Shselasky *		*dst = src
15165514Sphk *
152337041Shselasky * fcmpset:
153337041Shselasky *	if (*dst == *expect)
154337041Shselasky *		*dst = src
155337041Shselasky *	else
156337041Shselasky *		*expect = *dst
157337041Shselasky *
158337041Shselasky * Returns 0 on failure, non-zero on success.
15965514Sphk */
160337041Shselasky#define	ATOMIC_CMPSET(TYPE, CONS)			\
161337041Shselaskystatic __inline int					\
162337041Shselaskyatomic_cmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE expect, u_##TYPE src) \
163337041Shselasky{							\
164337041Shselasky	u_char res;					\
165337041Shselasky							\
166337041Shselasky	__asm __volatile(				\
167337041Shselasky	"	" MPLOCKED "		"		\
168337041Shselasky	"	cmpxchg	%3,%1 ;		"		\
169337041Shselasky	"	sete	%0 ;		"		\
170337041Shselasky	"# atomic_cmpset_" #TYPE "	"		\
171337041Shselasky	: "=q" (res),			/* 0 */		\
172337041Shselasky	  "+m" (*dst),			/* 1 */		\
173337041Shselasky	  "+a" (expect)			/* 2 */		\
174337041Shselasky	: CONS (src)			/* 3 */		\
175337041Shselasky	: "memory", "cc");				\
176337041Shselasky	return (res);					\
177337041Shselasky}							\
178337041Shselasky							\
179337041Shselaskystatic __inline int					\
180337041Shselaskyatomic_fcmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE *expect, u_##TYPE src) \
181337041Shselasky{							\
182337041Shselasky	u_char res;					\
183337041Shselasky							\
184337041Shselasky	__asm __volatile(				\
185337041Shselasky	"	" MPLOCKED "		"		\
186337041Shselasky	"	cmpxchg	%3,%1 ;		"		\
187337041Shselasky	"	sete	%0 ;		"		\
188337041Shselasky	"# atomic_fcmpset_" #TYPE "	"		\
189337041Shselasky	: "=q" (res),			/* 0 */		\
190337041Shselasky	  "+m" (*dst),			/* 1 */		\
191337041Shselasky	  "+a" (*expect)		/* 2 */		\
192337041Shselasky	: CONS (src)			/* 3 */		\
193337041Shselasky	: "memory", "cc");				\
194337041Shselasky	return (res);					\
195197910Sattilio}
196197910Sattilio
197337041ShselaskyATOMIC_CMPSET(char, "q");
198337041ShselaskyATOMIC_CMPSET(short, "r");
199337041ShselaskyATOMIC_CMPSET(int, "r");
200100327Smarkm
201150627Sjhb/*
202150627Sjhb * Atomically add the value of v to the integer pointed to by p and return
203150627Sjhb * the previous value of *p.
204150627Sjhb */
205150627Sjhbstatic __inline u_int
206150627Sjhbatomic_fetchadd_int(volatile u_int *p, u_int v)
207150627Sjhb{
208150627Sjhb
209165633Sbde	__asm __volatile(
210165630Sbde	"	" MPLOCKED "		"
211254610Sjkim	"	xaddl	%0,%1 ;		"
212150627Sjhb	"# atomic_fetchadd_int"
213254610Sjkim	: "+r" (v),			/* 0 */
214254612Sjkim	  "+m" (*p)			/* 1 */
215254612Sjkim	: : "cc");
216150627Sjhb	return (v);
217150627Sjhb}
218150627Sjhb
219254617Sjkimstatic __inline int
220254617Sjkimatomic_testandset_int(volatile u_int *p, u_int v)
221254617Sjkim{
222254617Sjkim	u_char res;
223254617Sjkim
224254617Sjkim	__asm __volatile(
225254617Sjkim	"	" MPLOCKED "		"
226254617Sjkim	"	btsl	%2,%1 ;		"
227254617Sjkim	"	setc	%0 ;		"
228254617Sjkim	"# atomic_testandset_int"
229254617Sjkim	: "=q" (res),			/* 0 */
230254617Sjkim	  "+m" (*p)			/* 1 */
231254617Sjkim	: "Ir" (v & 0x1f)		/* 2 */
232254617Sjkim	: "cc");
233254617Sjkim	return (res);
234254617Sjkim}
235254617Sjkim
236302108Ssephestatic __inline int
237302108Ssepheatomic_testandclear_int(volatile u_int *p, u_int v)
238302108Ssephe{
239302108Ssephe	u_char res;
240302108Ssephe
241302108Ssephe	__asm __volatile(
242302108Ssephe	"	" MPLOCKED "		"
243302108Ssephe	"	btrl	%2,%1 ;		"
244302108Ssephe	"	setc	%0 ;		"
245302108Ssephe	"# atomic_testandclear_int"
246302108Ssephe	: "=q" (res),			/* 0 */
247302108Ssephe	  "+m" (*p)			/* 1 */
248302108Ssephe	: "Ir" (v & 0x1f)		/* 2 */
249302108Ssephe	: "cc");
250302108Ssephe	return (res);
251302108Ssephe}
252302108Ssephe
253236456Skib/*
254236456Skib * We assume that a = b will do atomic loads and stores.  Due to the
255236456Skib * IA32 memory model, a simple store guarantees release semantics.
256236456Skib *
257236456Skib * However, loads may pass stores, so for atomic_load_acq we have to
258236456Skib * ensure a Store/Load barrier to do the load in SMP kernels.  We use
259236456Skib * "lock cmpxchg" as recommended by the AMD Software Optimization
260236456Skib * Guide, and not mfence.  For UP kernels, however, the cache of the
261236456Skib * single processor is always consistent, so we only need to take care
262236456Skib * of the compiler.
263236456Skib */
264236456Skib#define	ATOMIC_STORE(TYPE)				\
265236456Skibstatic __inline void					\
266236456Skibatomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
267236456Skib{							\
268241374Sattilio	__compiler_membar();				\
269236456Skib	*p = v;						\
270236456Skib}							\
271236456Skibstruct __hack
272236456Skib
273137623Sjhb#if defined(_KERNEL) && !defined(SMP)
274100327Smarkm
275236456Skib#define	ATOMIC_LOAD(TYPE, LOP)				\
27667351Sjhbstatic __inline u_##TYPE				\
27767351Sjhbatomic_load_acq_##TYPE(volatile u_##TYPE *p)		\
27867351Sjhb{							\
279197803Sattilio	u_##TYPE tmp;					\
280197803Sattilio							\
281197803Sattilio	tmp = *p;					\
282241374Sattilio	__compiler_membar();				\
283197803Sattilio	return (tmp);					\
28467351Sjhb}							\
285122827Sbdestruct __hack
286100327Smarkm
287165635Sbde#else /* !(_KERNEL && !SMP) */
28867351Sjhb
289236456Skib#define	ATOMIC_LOAD(TYPE, LOP)				\
29071023Sjhbstatic __inline u_##TYPE				\
29171023Sjhbatomic_load_acq_##TYPE(volatile u_##TYPE *p)		\
29271023Sjhb{							\
29371023Sjhb	u_##TYPE res;					\
29471023Sjhb							\
295165630Sbde	__asm __volatile(MPLOCKED LOP			\
296165635Sbde	: "=a" (res),			/* 0 */		\
297254612Sjkim	  "+m" (*p)			/* 1 */		\
298254612Sjkim	: : "memory", "cc");				\
29971023Sjhb	return (res);					\
30071023Sjhb}							\
301122827Sbdestruct __hack
302100327Smarkm
303165635Sbde#endif /* _KERNEL && !SMP */
304100327Smarkm
305254619Sjkim#ifdef _KERNEL
306254619Sjkim
307254619Sjkim#ifdef WANT_FUNCTIONS
308254620Sjkimint		atomic_cmpset_64_i386(volatile uint64_t *, uint64_t, uint64_t);
309254620Sjkimint		atomic_cmpset_64_i586(volatile uint64_t *, uint64_t, uint64_t);
310254619Sjkimuint64_t	atomic_load_acq_64_i386(volatile uint64_t *);
311254619Sjkimuint64_t	atomic_load_acq_64_i586(volatile uint64_t *);
312254619Sjkimvoid		atomic_store_rel_64_i386(volatile uint64_t *, uint64_t);
313254619Sjkimvoid		atomic_store_rel_64_i586(volatile uint64_t *, uint64_t);
314254620Sjkimuint64_t	atomic_swap_64_i386(volatile uint64_t *, uint64_t);
315254620Sjkimuint64_t	atomic_swap_64_i586(volatile uint64_t *, uint64_t);
316254619Sjkim#endif
317254619Sjkim
318254619Sjkim/* I486 does not support SMP or CMPXCHG8B. */
319254620Sjkimstatic __inline int
320254620Sjkimatomic_cmpset_64_i386(volatile uint64_t *dst, uint64_t expect, uint64_t src)
321254620Sjkim{
322254620Sjkim	volatile uint32_t *p;
323254620Sjkim	u_char res;
324254620Sjkim
325254620Sjkim	p = (volatile uint32_t *)dst;
326254620Sjkim	__asm __volatile(
327254620Sjkim	"	pushfl ;		"
328254620Sjkim	"	cli ;			"
329254620Sjkim	"	xorl	%1,%%eax ;	"
330254620Sjkim	"	xorl	%2,%%edx ;	"
331254620Sjkim	"	orl	%%edx,%%eax ;	"
332254620Sjkim	"	jne	1f ;		"
333254620Sjkim	"	movl	%4,%1 ;		"
334254620Sjkim	"	movl	%5,%2 ;		"
335254620Sjkim	"1:				"
336254620Sjkim	"	sete	%3 ;		"
337254620Sjkim	"	popfl"
338254620Sjkim	: "+A" (expect),		/* 0 */
339254620Sjkim	  "+m" (*p),			/* 1 */
340254620Sjkim	  "+m" (*(p + 1)),		/* 2 */
341254620Sjkim	  "=q" (res)			/* 3 */
342254620Sjkim	: "r" ((uint32_t)src),		/* 4 */
343254620Sjkim	  "r" ((uint32_t)(src >> 32))	/* 5 */
344254620Sjkim	: "memory", "cc");
345254620Sjkim	return (res);
346254620Sjkim}
347254620Sjkim
348254619Sjkimstatic __inline uint64_t
349254619Sjkimatomic_load_acq_64_i386(volatile uint64_t *p)
350254619Sjkim{
351254619Sjkim	volatile uint32_t *q;
352254619Sjkim	uint64_t res;
353254619Sjkim
354254619Sjkim	q = (volatile uint32_t *)p;
355254619Sjkim	__asm __volatile(
356254619Sjkim	"	pushfl ;		"
357254619Sjkim	"	cli ;			"
358254619Sjkim	"	movl	%1,%%eax ;	"
359254619Sjkim	"	movl	%2,%%edx ;	"
360254619Sjkim	"	popfl"
361254619Sjkim	: "=&A" (res)			/* 0 */
362254619Sjkim	: "m" (*q),			/* 1 */
363254619Sjkim	  "m" (*(q + 1))		/* 2 */
364254619Sjkim	: "memory");
365254619Sjkim	return (res);
366254619Sjkim}
367254619Sjkim
368254619Sjkimstatic __inline void
369254619Sjkimatomic_store_rel_64_i386(volatile uint64_t *p, uint64_t v)
370254619Sjkim{
371254619Sjkim	volatile uint32_t *q;
372254619Sjkim
373254619Sjkim	q = (volatile uint32_t *)p;
374254619Sjkim	__asm __volatile(
375254619Sjkim	"	pushfl ;		"
376254619Sjkim	"	cli ;			"
377254619Sjkim	"	movl	%%eax,%0 ;	"
378254619Sjkim	"	movl	%%edx,%1 ;	"
379254619Sjkim	"	popfl"
380254619Sjkim	: "=m" (*q),			/* 0 */
381254619Sjkim	  "=m" (*(q + 1))		/* 1 */
382254619Sjkim	: "A" (v)			/* 2 */
383254619Sjkim	: "memory");
384254619Sjkim}
385254619Sjkim
386254619Sjkimstatic __inline uint64_t
387254620Sjkimatomic_swap_64_i386(volatile uint64_t *p, uint64_t v)
388254620Sjkim{
389254620Sjkim	volatile uint32_t *q;
390254620Sjkim	uint64_t res;
391254620Sjkim
392254620Sjkim	q = (volatile uint32_t *)p;
393254620Sjkim	__asm __volatile(
394254620Sjkim	"	pushfl ;		"
395254620Sjkim	"	cli ;			"
396254620Sjkim	"	movl	%1,%%eax ;	"
397254620Sjkim	"	movl	%2,%%edx ;	"
398254620Sjkim	"	movl	%4,%2 ;		"
399254620Sjkim	"	movl	%3,%1 ;		"
400254620Sjkim	"	popfl"
401254620Sjkim	: "=&A" (res),			/* 0 */
402254620Sjkim	  "+m" (*q),			/* 1 */
403254620Sjkim	  "+m" (*(q + 1))		/* 2 */
404254620Sjkim	: "r" ((uint32_t)v),		/* 3 */
405254620Sjkim	  "r" ((uint32_t)(v >> 32)));	/* 4 */
406254620Sjkim	return (res);
407254620Sjkim}
408254620Sjkim
409254620Sjkimstatic __inline int
410254620Sjkimatomic_cmpset_64_i586(volatile uint64_t *dst, uint64_t expect, uint64_t src)
411254620Sjkim{
412254620Sjkim	u_char res;
413254620Sjkim
414254620Sjkim	__asm __volatile(
415254620Sjkim	"	" MPLOCKED "		"
416254620Sjkim	"	cmpxchg8b %1 ;		"
417254620Sjkim	"	sete	%0"
418254620Sjkim	: "=q" (res),			/* 0 */
419254620Sjkim	  "+m" (*dst),			/* 1 */
420254620Sjkim	  "+A" (expect)			/* 2 */
421254620Sjkim	: "b" ((uint32_t)src),		/* 3 */
422254620Sjkim	  "c" ((uint32_t)(src >> 32))	/* 4 */
423254620Sjkim	: "memory", "cc");
424254620Sjkim	return (res);
425254620Sjkim}
426254620Sjkim
427254620Sjkimstatic __inline uint64_t
428254619Sjkimatomic_load_acq_64_i586(volatile uint64_t *p)
429254619Sjkim{
430254619Sjkim	uint64_t res;
431254619Sjkim
432254619Sjkim	__asm __volatile(
433254619Sjkim	"	movl	%%ebx,%%eax ;	"
434254619Sjkim	"	movl	%%ecx,%%edx ;	"
435254619Sjkim	"	" MPLOCKED "		"
436254619Sjkim	"	cmpxchg8b %1"
437254619Sjkim	: "=&A" (res),			/* 0 */
438254619Sjkim	  "+m" (*p)			/* 1 */
439254619Sjkim	: : "memory", "cc");
440254619Sjkim	return (res);
441254619Sjkim}
442254619Sjkim
443254619Sjkimstatic __inline void
444254619Sjkimatomic_store_rel_64_i586(volatile uint64_t *p, uint64_t v)
445254619Sjkim{
446254619Sjkim
447254619Sjkim	__asm __volatile(
448254619Sjkim	"	movl	%%eax,%%ebx ;	"
449254619Sjkim	"	movl	%%edx,%%ecx ;	"
450254619Sjkim	"1:				"
451254619Sjkim	"	" MPLOCKED "		"
452254619Sjkim	"	cmpxchg8b %0 ;		"
453254619Sjkim	"	jne	1b"
454254619Sjkim	: "+m" (*p),			/* 0 */
455254619Sjkim	  "+A" (v)			/* 1 */
456254619Sjkim	: : "ebx", "ecx", "memory", "cc");
457254619Sjkim}
458254619Sjkim
459254619Sjkimstatic __inline uint64_t
460254620Sjkimatomic_swap_64_i586(volatile uint64_t *p, uint64_t v)
461254620Sjkim{
462254620Sjkim
463254620Sjkim	__asm __volatile(
464254620Sjkim	"	movl	%%eax,%%ebx ;	"
465254620Sjkim	"	movl	%%edx,%%ecx ;	"
466254620Sjkim	"1:				"
467254620Sjkim	"	" MPLOCKED "		"
468254620Sjkim	"	cmpxchg8b %0 ;		"
469254620Sjkim	"	jne	1b"
470254620Sjkim	: "+m" (*p),			/* 0 */
471254620Sjkim	  "+A" (v)			/* 1 */
472254620Sjkim	: : "ebx", "ecx", "memory", "cc");
473254620Sjkim	return (v);
474254620Sjkim}
475254620Sjkim
476254620Sjkimstatic __inline int
477254620Sjkimatomic_cmpset_64(volatile uint64_t *dst, uint64_t expect, uint64_t src)
478254620Sjkim{
479254620Sjkim
480254620Sjkim	if ((cpu_feature & CPUID_CX8) == 0)
481254620Sjkim		return (atomic_cmpset_64_i386(dst, expect, src));
482254620Sjkim	else
483254620Sjkim		return (atomic_cmpset_64_i586(dst, expect, src));
484254620Sjkim}
485254620Sjkim
486254620Sjkimstatic __inline uint64_t
487254619Sjkimatomic_load_acq_64(volatile uint64_t *p)
488254619Sjkim{
489254619Sjkim
490254619Sjkim	if ((cpu_feature & CPUID_CX8) == 0)
491254619Sjkim		return (atomic_load_acq_64_i386(p));
492254619Sjkim	else
493254619Sjkim		return (atomic_load_acq_64_i586(p));
494254619Sjkim}
495254619Sjkim
496254619Sjkimstatic __inline void
497254619Sjkimatomic_store_rel_64(volatile uint64_t *p, uint64_t v)
498254619Sjkim{
499254619Sjkim
500254619Sjkim	if ((cpu_feature & CPUID_CX8) == 0)
501254619Sjkim		atomic_store_rel_64_i386(p, v);
502254619Sjkim	else
503254619Sjkim		atomic_store_rel_64_i586(p, v);
504254619Sjkim}
505254619Sjkim
506254620Sjkimstatic __inline uint64_t
507254620Sjkimatomic_swap_64(volatile uint64_t *p, uint64_t v)
508254620Sjkim{
509254620Sjkim
510254620Sjkim	if ((cpu_feature & CPUID_CX8) == 0)
511254620Sjkim		return (atomic_swap_64_i386(p, v));
512254620Sjkim	else
513254620Sjkim		return (atomic_swap_64_i586(p, v));
514254620Sjkim}
515254620Sjkim
516326515Shselaskystatic __inline uint64_t
517326515Shselaskyatomic_fetchadd_64(volatile uint64_t *p, uint64_t v)
518326515Shselasky{
519326515Shselasky
520326515Shselasky	for (;;) {
521326515Shselasky		uint64_t t = *p;
522326515Shselasky		if (atomic_cmpset_64(p, t, t + v))
523326515Shselasky			return (t);
524326515Shselasky	}
525326515Shselasky}
526326515Shselasky
527254619Sjkim#endif /* _KERNEL */
528254619Sjkim
529147855Sjhb#endif /* KLD_MODULE || !__GNUCLIKE_ASM */
530100251Smarkm
531100251SmarkmATOMIC_ASM(set,	     char,  "orb %b1,%0",  "iq",  v);
532100251SmarkmATOMIC_ASM(clear,    char,  "andb %b1,%0", "iq", ~v);
533100251SmarkmATOMIC_ASM(add,	     char,  "addb %b1,%0", "iq",  v);
534100251SmarkmATOMIC_ASM(subtract, char,  "subb %b1,%0", "iq",  v);
53571085Sjhb
536100251SmarkmATOMIC_ASM(set,	     short, "orw %w1,%0",  "ir",  v);
537100251SmarkmATOMIC_ASM(clear,    short, "andw %w1,%0", "ir", ~v);
538100251SmarkmATOMIC_ASM(add,	     short, "addw %w1,%0", "ir",  v);
539100251SmarkmATOMIC_ASM(subtract, short, "subw %w1,%0", "ir",  v);
54071085Sjhb
541100251SmarkmATOMIC_ASM(set,	     int,   "orl %1,%0",   "ir",  v);
542100251SmarkmATOMIC_ASM(clear,    int,   "andl %1,%0",  "ir", ~v);
543100251SmarkmATOMIC_ASM(add,	     int,   "addl %1,%0",  "ir",  v);
544100251SmarkmATOMIC_ASM(subtract, int,   "subl %1,%0",  "ir",  v);
54571085Sjhb
546100251SmarkmATOMIC_ASM(set,	     long,  "orl %1,%0",   "ir",  v);
547100251SmarkmATOMIC_ASM(clear,    long,  "andl %1,%0",  "ir", ~v);
548100251SmarkmATOMIC_ASM(add,	     long,  "addl %1,%0",  "ir",  v);
549100251SmarkmATOMIC_ASM(subtract, long,  "subl %1,%0",  "ir",  v);
55071085Sjhb
551236456SkibATOMIC_LOAD(char,  "cmpxchgb %b0,%1");
552236456SkibATOMIC_LOAD(short, "cmpxchgw %w0,%1");
553236456SkibATOMIC_LOAD(int,   "cmpxchgl %0,%1");
554236456SkibATOMIC_LOAD(long,  "cmpxchgl %0,%1");
55571023Sjhb
556236456SkibATOMIC_STORE(char);
557236456SkibATOMIC_STORE(short);
558236456SkibATOMIC_STORE(int);
559236456SkibATOMIC_STORE(long);
560236456Skib
56171085Sjhb#undef ATOMIC_ASM
562236456Skib#undef ATOMIC_LOAD
563236456Skib#undef ATOMIC_STORE
56467351Sjhb
565165635Sbde#ifndef WANT_FUNCTIONS
566147855Sjhb
567147855Sjhbstatic __inline int
568208332Sphkatomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src)
569147855Sjhb{
570147855Sjhb
571208332Sphk	return (atomic_cmpset_int((volatile u_int *)dst, (u_int)expect,
572147855Sjhb	    (u_int)src));
573147855Sjhb}
574147855Sjhb
575177276Spjdstatic __inline u_long
576177276Spjdatomic_fetchadd_long(volatile u_long *p, u_long v)
577177276Spjd{
578177276Spjd
579177276Spjd	return (atomic_fetchadd_int((volatile u_int *)p, (u_int)v));
580177276Spjd}
581177276Spjd
582254617Sjkimstatic __inline int
583254617Sjkimatomic_testandset_long(volatile u_long *p, u_int v)
584254617Sjkim{
585254617Sjkim
586254617Sjkim	return (atomic_testandset_int((volatile u_int *)p, v));
587254617Sjkim}
588254617Sjkim
589302108Ssephestatic __inline int
590302108Ssepheatomic_testandclear_long(volatile u_long *p, u_int v)
591302108Ssephe{
592302108Ssephe
593302108Ssephe	return (atomic_testandclear_int((volatile u_int *)p, v));
594302108Ssephe}
595302108Ssephe
596254617Sjkim/* Read the current value and store a new value in the destination. */
597147855Sjhb#ifdef __GNUCLIKE_ASM
598147855Sjhb
599147855Sjhbstatic __inline u_int
600254617Sjkimatomic_swap_int(volatile u_int *p, u_int v)
601147855Sjhb{
602147855Sjhb
603165633Sbde	__asm __volatile(
604147855Sjhb	"	xchgl	%1,%0 ;		"
605254617Sjkim	"# atomic_swap_int"
606254617Sjkim	: "+r" (v),			/* 0 */
607254612Sjkim	  "+m" (*p));			/* 1 */
608254617Sjkim	return (v);
609147855Sjhb}
610147855Sjhb
611147855Sjhbstatic __inline u_long
612254617Sjkimatomic_swap_long(volatile u_long *p, u_long v)
613147855Sjhb{
614147855Sjhb
615254617Sjkim	return (atomic_swap_int((volatile u_int *)p, (u_int)v));
616147855Sjhb}
617147855Sjhb
618147855Sjhb#else /* !__GNUCLIKE_ASM */
619147855Sjhb
620254617Sjkimu_int	atomic_swap_int(volatile u_int *p, u_int v);
621254617Sjkimu_long	atomic_swap_long(volatile u_long *p, u_long v);
622147855Sjhb
623147855Sjhb#endif /* __GNUCLIKE_ASM */
624147855Sjhb
625197803Sattilio#define	atomic_set_acq_char		atomic_set_barr_char
626197803Sattilio#define	atomic_set_rel_char		atomic_set_barr_char
627197803Sattilio#define	atomic_clear_acq_char		atomic_clear_barr_char
628197803Sattilio#define	atomic_clear_rel_char		atomic_clear_barr_char
629197803Sattilio#define	atomic_add_acq_char		atomic_add_barr_char
630197803Sattilio#define	atomic_add_rel_char		atomic_add_barr_char
631197803Sattilio#define	atomic_subtract_acq_char	atomic_subtract_barr_char
632197803Sattilio#define	atomic_subtract_rel_char	atomic_subtract_barr_char
633337041Shselasky#define	atomic_cmpset_acq_char		atomic_cmpset_char
634337041Shselasky#define	atomic_cmpset_rel_char		atomic_cmpset_char
635337041Shselasky#define	atomic_fcmpset_acq_char		atomic_fcmpset_char
636337041Shselasky#define	atomic_fcmpset_rel_char		atomic_fcmpset_char
63771085Sjhb
638197803Sattilio#define	atomic_set_acq_short		atomic_set_barr_short
639197803Sattilio#define	atomic_set_rel_short		atomic_set_barr_short
640197803Sattilio#define	atomic_clear_acq_short		atomic_clear_barr_short
641197803Sattilio#define	atomic_clear_rel_short		atomic_clear_barr_short
642197803Sattilio#define	atomic_add_acq_short		atomic_add_barr_short
643197803Sattilio#define	atomic_add_rel_short		atomic_add_barr_short
644197803Sattilio#define	atomic_subtract_acq_short	atomic_subtract_barr_short
645197803Sattilio#define	atomic_subtract_rel_short	atomic_subtract_barr_short
646337041Shselasky#define	atomic_cmpset_acq_short		atomic_cmpset_short
647337041Shselasky#define	atomic_cmpset_rel_short		atomic_cmpset_short
648337041Shselasky#define	atomic_fcmpset_acq_short	atomic_fcmpset_short
649337041Shselasky#define	atomic_fcmpset_rel_short	atomic_fcmpset_short
65071085Sjhb
651197803Sattilio#define	atomic_set_acq_int		atomic_set_barr_int
652197803Sattilio#define	atomic_set_rel_int		atomic_set_barr_int
653197803Sattilio#define	atomic_clear_acq_int		atomic_clear_barr_int
654197803Sattilio#define	atomic_clear_rel_int		atomic_clear_barr_int
655197803Sattilio#define	atomic_add_acq_int		atomic_add_barr_int
656197803Sattilio#define	atomic_add_rel_int		atomic_add_barr_int
657197803Sattilio#define	atomic_subtract_acq_int		atomic_subtract_barr_int
658197803Sattilio#define	atomic_subtract_rel_int		atomic_subtract_barr_int
659197910Sattilio#define	atomic_cmpset_acq_int		atomic_cmpset_int
660197910Sattilio#define	atomic_cmpset_rel_int		atomic_cmpset_int
661337041Shselasky#define	atomic_fcmpset_acq_int		atomic_fcmpset_int
662337041Shselasky#define	atomic_fcmpset_rel_int		atomic_fcmpset_int
66371085Sjhb
664197803Sattilio#define	atomic_set_acq_long		atomic_set_barr_long
665197803Sattilio#define	atomic_set_rel_long		atomic_set_barr_long
666197803Sattilio#define	atomic_clear_acq_long		atomic_clear_barr_long
667197803Sattilio#define	atomic_clear_rel_long		atomic_clear_barr_long
668197803Sattilio#define	atomic_add_acq_long		atomic_add_barr_long
669197803Sattilio#define	atomic_add_rel_long		atomic_add_barr_long
670197803Sattilio#define	atomic_subtract_acq_long	atomic_subtract_barr_long
671197803Sattilio#define	atomic_subtract_rel_long	atomic_subtract_barr_long
672197910Sattilio#define	atomic_cmpset_acq_long		atomic_cmpset_long
673197910Sattilio#define	atomic_cmpset_rel_long		atomic_cmpset_long
674337041Shselasky#define	atomic_fcmpset_acq_long		atomic_fcmpset_long
675337041Shselasky#define	atomic_fcmpset_rel_long		atomic_fcmpset_long
67671085Sjhb
677254617Sjkim#define	atomic_readandclear_int(p)	atomic_swap_int(p, 0)
678254617Sjkim#define	atomic_readandclear_long(p)	atomic_swap_long(p, 0)
679254617Sjkim
680147855Sjhb/* Operations on 8-bit bytes. */
68171085Sjhb#define	atomic_set_8		atomic_set_char
68271085Sjhb#define	atomic_set_acq_8	atomic_set_acq_char
68371085Sjhb#define	atomic_set_rel_8	atomic_set_rel_char
68471085Sjhb#define	atomic_clear_8		atomic_clear_char
68571085Sjhb#define	atomic_clear_acq_8	atomic_clear_acq_char
68671085Sjhb#define	atomic_clear_rel_8	atomic_clear_rel_char
68771085Sjhb#define	atomic_add_8		atomic_add_char
68871085Sjhb#define	atomic_add_acq_8	atomic_add_acq_char
68971085Sjhb#define	atomic_add_rel_8	atomic_add_rel_char
69071085Sjhb#define	atomic_subtract_8	atomic_subtract_char
69171085Sjhb#define	atomic_subtract_acq_8	atomic_subtract_acq_char
69271085Sjhb#define	atomic_subtract_rel_8	atomic_subtract_rel_char
69371085Sjhb#define	atomic_load_acq_8	atomic_load_acq_char
69471085Sjhb#define	atomic_store_rel_8	atomic_store_rel_char
695337041Shselasky#define	atomic_cmpset_8		atomic_cmpset_char
696337041Shselasky#define	atomic_cmpset_acq_8	atomic_cmpset_acq_char
697337041Shselasky#define	atomic_cmpset_rel_8	atomic_cmpset_rel_char
698337041Shselasky#define	atomic_fcmpset_8	atomic_fcmpset_char
699337041Shselasky#define	atomic_fcmpset_acq_8	atomic_fcmpset_acq_char
700337041Shselasky#define	atomic_fcmpset_rel_8	atomic_fcmpset_rel_char
70171085Sjhb
702147855Sjhb/* Operations on 16-bit words. */
70371085Sjhb#define	atomic_set_16		atomic_set_short
70471085Sjhb#define	atomic_set_acq_16	atomic_set_acq_short
70571085Sjhb#define	atomic_set_rel_16	atomic_set_rel_short
70671085Sjhb#define	atomic_clear_16		atomic_clear_short
70771085Sjhb#define	atomic_clear_acq_16	atomic_clear_acq_short
70871085Sjhb#define	atomic_clear_rel_16	atomic_clear_rel_short
70971085Sjhb#define	atomic_add_16		atomic_add_short
71071085Sjhb#define	atomic_add_acq_16	atomic_add_acq_short
71171085Sjhb#define	atomic_add_rel_16	atomic_add_rel_short
71271085Sjhb#define	atomic_subtract_16	atomic_subtract_short
71371085Sjhb#define	atomic_subtract_acq_16	atomic_subtract_acq_short
71471085Sjhb#define	atomic_subtract_rel_16	atomic_subtract_rel_short
71571085Sjhb#define	atomic_load_acq_16	atomic_load_acq_short
71671085Sjhb#define	atomic_store_rel_16	atomic_store_rel_short
717337041Shselasky#define	atomic_cmpset_16	atomic_cmpset_short
718337041Shselasky#define	atomic_cmpset_acq_16	atomic_cmpset_acq_short
719337041Shselasky#define	atomic_cmpset_rel_16	atomic_cmpset_rel_short
720337041Shselasky#define	atomic_fcmpset_16	atomic_fcmpset_short
721337041Shselasky#define	atomic_fcmpset_acq_16	atomic_fcmpset_acq_short
722337041Shselasky#define	atomic_fcmpset_rel_16	atomic_fcmpset_rel_short
72371085Sjhb
724147855Sjhb/* Operations on 32-bit double words. */
72571085Sjhb#define	atomic_set_32		atomic_set_int
72671085Sjhb#define	atomic_set_acq_32	atomic_set_acq_int
72771085Sjhb#define	atomic_set_rel_32	atomic_set_rel_int
72871085Sjhb#define	atomic_clear_32		atomic_clear_int
72971085Sjhb#define	atomic_clear_acq_32	atomic_clear_acq_int
73071085Sjhb#define	atomic_clear_rel_32	atomic_clear_rel_int
73171085Sjhb#define	atomic_add_32		atomic_add_int
73271085Sjhb#define	atomic_add_acq_32	atomic_add_acq_int
73371085Sjhb#define	atomic_add_rel_32	atomic_add_rel_int
73471085Sjhb#define	atomic_subtract_32	atomic_subtract_int
73571085Sjhb#define	atomic_subtract_acq_32	atomic_subtract_acq_int
73671085Sjhb#define	atomic_subtract_rel_32	atomic_subtract_rel_int
73771085Sjhb#define	atomic_load_acq_32	atomic_load_acq_int
73871085Sjhb#define	atomic_store_rel_32	atomic_store_rel_int
73971085Sjhb#define	atomic_cmpset_32	atomic_cmpset_int
74071085Sjhb#define	atomic_cmpset_acq_32	atomic_cmpset_acq_int
74171085Sjhb#define	atomic_cmpset_rel_32	atomic_cmpset_rel_int
742337041Shselasky#define	atomic_fcmpset_32	atomic_fcmpset_int
743337041Shselasky#define	atomic_fcmpset_acq_32	atomic_fcmpset_acq_int
744337041Shselasky#define	atomic_fcmpset_rel_32	atomic_fcmpset_rel_int
745254617Sjkim#define	atomic_swap_32		atomic_swap_int
74671085Sjhb#define	atomic_readandclear_32	atomic_readandclear_int
747150627Sjhb#define	atomic_fetchadd_32	atomic_fetchadd_int
748254617Sjkim#define	atomic_testandset_32	atomic_testandset_int
749302108Ssephe#define	atomic_testandclear_32	atomic_testandclear_int
75071085Sjhb
751147855Sjhb/* Operations on pointers. */
752157212Sdes#define	atomic_set_ptr(p, v) \
753157212Sdes	atomic_set_int((volatile u_int *)(p), (u_int)(v))
754157212Sdes#define	atomic_set_acq_ptr(p, v) \
755157212Sdes	atomic_set_acq_int((volatile u_int *)(p), (u_int)(v))
756157212Sdes#define	atomic_set_rel_ptr(p, v) \
757157212Sdes	atomic_set_rel_int((volatile u_int *)(p), (u_int)(v))
758157212Sdes#define	atomic_clear_ptr(p, v) \
759157212Sdes	atomic_clear_int((volatile u_int *)(p), (u_int)(v))
760157212Sdes#define	atomic_clear_acq_ptr(p, v) \
761157212Sdes	atomic_clear_acq_int((volatile u_int *)(p), (u_int)(v))
762157212Sdes#define	atomic_clear_rel_ptr(p, v) \
763157212Sdes	atomic_clear_rel_int((volatile u_int *)(p), (u_int)(v))
764157212Sdes#define	atomic_add_ptr(p, v) \
765157212Sdes	atomic_add_int((volatile u_int *)(p), (u_int)(v))
766157212Sdes#define	atomic_add_acq_ptr(p, v) \
767157212Sdes	atomic_add_acq_int((volatile u_int *)(p), (u_int)(v))
768157212Sdes#define	atomic_add_rel_ptr(p, v) \
769157212Sdes	atomic_add_rel_int((volatile u_int *)(p), (u_int)(v))
770157212Sdes#define	atomic_subtract_ptr(p, v) \
771157212Sdes	atomic_subtract_int((volatile u_int *)(p), (u_int)(v))
772157212Sdes#define	atomic_subtract_acq_ptr(p, v) \
773157212Sdes	atomic_subtract_acq_int((volatile u_int *)(p), (u_int)(v))
774157212Sdes#define	atomic_subtract_rel_ptr(p, v) \
775157212Sdes	atomic_subtract_rel_int((volatile u_int *)(p), (u_int)(v))
776157212Sdes#define	atomic_load_acq_ptr(p) \
777157212Sdes	atomic_load_acq_int((volatile u_int *)(p))
778157212Sdes#define	atomic_store_rel_ptr(p, v) \
779157212Sdes	atomic_store_rel_int((volatile u_int *)(p), (v))
780157212Sdes#define	atomic_cmpset_ptr(dst, old, new) \
781157212Sdes	atomic_cmpset_int((volatile u_int *)(dst), (u_int)(old), (u_int)(new))
782157212Sdes#define	atomic_cmpset_acq_ptr(dst, old, new) \
783165633Sbde	atomic_cmpset_acq_int((volatile u_int *)(dst), (u_int)(old), \
784165633Sbde	    (u_int)(new))
785157212Sdes#define	atomic_cmpset_rel_ptr(dst, old, new) \
786165633Sbde	atomic_cmpset_rel_int((volatile u_int *)(dst), (u_int)(old), \
787165633Sbde	    (u_int)(new))
788337041Shselasky#define	atomic_fcmpset_ptr(dst, old, new) \
789337041Shselasky	atomic_fcmpset_int((volatile u_int *)(dst), (u_int *)(old), (u_int)(new))
790337041Shselasky#define	atomic_fcmpset_acq_ptr(dst, old, new) \
791337041Shselasky	atomic_fcmpset_acq_int((volatile u_int *)(dst), (u_int *)(old), \
792337041Shselasky	    (u_int)(new))
793337041Shselasky#define	atomic_fcmpset_rel_ptr(dst, old, new) \
794337041Shselasky	atomic_fcmpset_rel_int((volatile u_int *)(dst), (u_int *)(old), \
795337041Shselasky	    (u_int)(new))
796254617Sjkim#define	atomic_swap_ptr(p, v) \
797254617Sjkim	atomic_swap_int((volatile u_int *)(p), (u_int)(v))
798157212Sdes#define	atomic_readandclear_ptr(p) \
799157212Sdes	atomic_readandclear_int((volatile u_int *)(p))
80065514Sphk
801165635Sbde#endif /* !WANT_FUNCTIONS */
802165633Sbde
803165633Sbde#endif /* !_MACHINE_ATOMIC_H_ */
804