atomic.h revision 327195
138517Sdfr/*-
238517Sdfr * Copyright (c) 1998 Doug Rabson
338517Sdfr * All rights reserved.
438517Sdfr *
538517Sdfr * Redistribution and use in source and binary forms, with or without
638517Sdfr * modification, are permitted provided that the following conditions
738517Sdfr * are met:
838517Sdfr * 1. Redistributions of source code must retain the above copyright
938517Sdfr *    notice, this list of conditions and the following disclaimer.
1038517Sdfr * 2. Redistributions in binary form must reproduce the above copyright
1138517Sdfr *    notice, this list of conditions and the following disclaimer in the
1238517Sdfr *    documentation and/or other materials provided with the distribution.
1338517Sdfr *
1438517Sdfr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1538517Sdfr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1638517Sdfr * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1738517Sdfr * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1838517Sdfr * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1938517Sdfr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2038517Sdfr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2138517Sdfr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2238517Sdfr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2338517Sdfr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2438517Sdfr * SUCH DAMAGE.
2538517Sdfr *
2650477Speter * $FreeBSD: stable/11/sys/i386/include/atomic.h 327195 2017-12-26 10:07:17Z kib $
2738517Sdfr */
2838517Sdfr#ifndef _MACHINE_ATOMIC_H_
29147855Sjhb#define	_MACHINE_ATOMIC_H_
3038517Sdfr
31143063Sjoerg#ifndef _SYS_CDEFS_H_
32143063Sjoerg#error this file needs sys/cdefs.h as a prerequisite
33143063Sjoerg#endif
34143063Sjoerg
35327195Skib#include <sys/atomic_common.h>
36327195Skib
37254619Sjkim#ifdef _KERNEL
38254619Sjkim#include <machine/md_var.h>
39254619Sjkim#include <machine/specialreg.h>
40254619Sjkim#endif
41254619Sjkim
42286051Skib#ifndef __OFFSETOF_MONITORBUF
43286051Skib/*
44286051Skib * __OFFSETOF_MONITORBUF == __pcpu_offset(pc_monitorbuf).
45286051Skib *
46286051Skib * The open-coded number is used instead of the symbolic expression to
47286051Skib * avoid a dependency on sys/pcpu.h in machine/atomic.h consumers.
48286051Skib * An assertion in i386/vm_machdep.c ensures that the value is correct.
49286051Skib */
50286051Skib#define	__OFFSETOF_MONITORBUF	0x180
51185162Skmacy
52286051Skibstatic __inline void
53286051Skib__mbk(void)
54286051Skib{
55286051Skib
56286051Skib	__asm __volatile("lock; addl $0,%%fs:%0"
57286051Skib	    : "+m" (*(u_int *)__OFFSETOF_MONITORBUF) : : "memory", "cc");
58286051Skib}
59286051Skib
60286051Skibstatic __inline void
61286051Skib__mbu(void)
62286051Skib{
63286051Skib
64286051Skib	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc");
65286051Skib}
66286051Skib#endif
67286051Skib
6838517Sdfr/*
69165635Sbde * Various simple operations on memory, each of which is atomic in the
70165635Sbde * presence of interrupts and multiple processors.
7138517Sdfr *
72165633Sbde * atomic_set_char(P, V)	(*(u_char *)(P) |= (V))
73165633Sbde * atomic_clear_char(P, V)	(*(u_char *)(P) &= ~(V))
74165633Sbde * atomic_add_char(P, V)	(*(u_char *)(P) += (V))
75165633Sbde * atomic_subtract_char(P, V)	(*(u_char *)(P) -= (V))
7648797Salc *
77165633Sbde * atomic_set_short(P, V)	(*(u_short *)(P) |= (V))
78165633Sbde * atomic_clear_short(P, V)	(*(u_short *)(P) &= ~(V))
79165633Sbde * atomic_add_short(P, V)	(*(u_short *)(P) += (V))
80165633Sbde * atomic_subtract_short(P, V)	(*(u_short *)(P) -= (V))
8148797Salc *
82165633Sbde * atomic_set_int(P, V)		(*(u_int *)(P) |= (V))
83165633Sbde * atomic_clear_int(P, V)	(*(u_int *)(P) &= ~(V))
84165633Sbde * atomic_add_int(P, V)		(*(u_int *)(P) += (V))
85165633Sbde * atomic_subtract_int(P, V)	(*(u_int *)(P) -= (V))
86254617Sjkim * atomic_swap_int(P, V)	(return (*(u_int *)(P)); *(u_int *)(P) = (V);)
87165635Sbde * atomic_readandclear_int(P)	(return (*(u_int *)(P)); *(u_int *)(P) = 0;)
8848797Salc *
89165633Sbde * atomic_set_long(P, V)	(*(u_long *)(P) |= (V))
90165633Sbde * atomic_clear_long(P, V)	(*(u_long *)(P) &= ~(V))
91165633Sbde * atomic_add_long(P, V)	(*(u_long *)(P) += (V))
92165633Sbde * atomic_subtract_long(P, V)	(*(u_long *)(P) -= (V))
93254617Sjkim * atomic_swap_long(P, V)	(return (*(u_long *)(P)); *(u_long *)(P) = (V);)
94165635Sbde * atomic_readandclear_long(P)	(return (*(u_long *)(P)); *(u_long *)(P) = 0;)
9538517Sdfr */
9638517Sdfr
9748797Salc/*
9849999Salc * The above functions are expanded inline in the statically-linked
9949999Salc * kernel.  Lock prefixes are generated if an SMP kernel is being
10049999Salc * built.
10149999Salc *
10249999Salc * Kernel modules call real functions which are built into the kernel.
10349999Salc * This allows kernel modules to be portable between UP and SMP systems.
10448797Salc */
105147855Sjhb#if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM)
106147855Sjhb#define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)			\
107197803Sattiliovoid atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v);	\
108197803Sattiliovoid atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
10949999Salc
110208332Sphkint	atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src);
111315371Smjgint	atomic_fcmpset_int(volatile u_int *dst, u_int *expect, u_int src);
112165633Sbdeu_int	atomic_fetchadd_int(volatile u_int *p, u_int v);
113254617Sjkimint	atomic_testandset_int(volatile u_int *p, u_int v);
114299912Ssepheint	atomic_testandclear_int(volatile u_int *p, u_int v);
115285283Skibvoid	atomic_thread_fence_acq(void);
116285283Skibvoid	atomic_thread_fence_acq_rel(void);
117285283Skibvoid	atomic_thread_fence_rel(void);
118285283Skibvoid	atomic_thread_fence_seq_cst(void);
11965514Sphk
120284901Skib#define	ATOMIC_LOAD(TYPE)					\
121236456Skibu_##TYPE	atomic_load_acq_##TYPE(volatile u_##TYPE *p)
122236456Skib#define	ATOMIC_STORE(TYPE)					\
123100251Smarkmvoid		atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
12471085Sjhb
125254620Sjkimint		atomic_cmpset_64(volatile uint64_t *, uint64_t, uint64_t);
126254619Sjkimuint64_t	atomic_load_acq_64(volatile uint64_t *);
127254619Sjkimvoid		atomic_store_rel_64(volatile uint64_t *, uint64_t);
128254620Sjkimuint64_t	atomic_swap_64(volatile uint64_t *, uint64_t);
129326514Shselaskyuint64_t	atomic_fetchadd_64(volatile uint64_t *, uint64_t);
130254619Sjkim
131147855Sjhb#else /* !KLD_MODULE && __GNUCLIKE_ASM */
13272358Smarkm
13384679Sjhb/*
134165635Sbde * For userland, always use lock prefixes so that the binaries will run
135165635Sbde * on both SMP and !SMP systems.
13684679Sjhb */
13784679Sjhb#if defined(SMP) || !defined(_KERNEL)
138165630Sbde#define	MPLOCKED	"lock ; "
13990515Sbde#else
140147855Sjhb#define	MPLOCKED
14190515Sbde#endif
14238517Sdfr
14348797Salc/*
144197803Sattilio * The assembly is volatilized to avoid code chunk removal by the compiler.
145197803Sattilio * GCC aggressively reorders operations and memory clobbering is necessary
146197803Sattilio * in order to avoid that for memory barriers.
14748797Salc */
148147855Sjhb#define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)		\
14948797Salcstatic __inline void					\
15049043Salcatomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
15148797Salc{							\
152165630Sbde	__asm __volatile(MPLOCKED OP			\
153254612Sjkim	: "+m" (*p)					\
154254612Sjkim	: CONS (V)					\
155216524Skib	: "cc");					\
156122827Sbde}							\
157197803Sattilio							\
158197803Sattiliostatic __inline void					\
159197803Sattilioatomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
160197803Sattilio{							\
161197803Sattilio	__asm __volatile(MPLOCKED OP			\
162254612Sjkim	: "+m" (*p)					\
163254612Sjkim	: CONS (V)					\
164216524Skib	: "memory", "cc");				\
165197803Sattilio}							\
166122827Sbdestruct __hack
167100327Smarkm
16865514Sphk/*
16965514Sphk * Atomic compare and set, used by the mutex functions
17065514Sphk *
171208332Sphk * if (*dst == expect) *dst = src (all 32 bit words)
17265514Sphk *
17365514Sphk * Returns 0 on failure, non-zero on success
17465514Sphk */
17565514Sphk
176197910Sattiliostatic __inline int
177208332Sphkatomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src)
178197910Sattilio{
179197910Sattilio	u_char res;
18065514Sphk
181197910Sattilio	__asm __volatile(
182197910Sattilio	"	" MPLOCKED "		"
183254614Sjkim	"	cmpxchgl %3,%1 ;	"
184197910Sattilio	"       sete	%0 ;		"
185197910Sattilio	"# atomic_cmpset_int"
186254614Sjkim	: "=q" (res),			/* 0 */
187254614Sjkim	  "+m" (*dst),			/* 1 */
188254614Sjkim	  "+a" (expect)			/* 2 */
189254614Sjkim	: "r" (src)			/* 3 */
190216524Skib	: "memory", "cc");
191197910Sattilio	return (res);
192197910Sattilio}
193197910Sattilio
194315371Smjgstatic __inline int
195315371Smjgatomic_fcmpset_int(volatile u_int *dst, u_int *expect, u_int src)
196315371Smjg{
197315371Smjg	u_char res;
198315371Smjg
199315371Smjg	__asm __volatile(
200315371Smjg	"	" MPLOCKED "		"
201315371Smjg	"	cmpxchgl %3,%1 ;	"
202315371Smjg	"       sete	%0 ;		"
203315371Smjg	"# atomic_cmpset_int"
204315371Smjg	: "=q" (res),			/* 0 */
205315371Smjg	  "+m" (*dst),			/* 1 */
206315371Smjg	  "+a" (*expect)		/* 2 */
207315371Smjg	: "r" (src)			/* 3 */
208315371Smjg	: "memory", "cc");
209315371Smjg	return (res);
210315371Smjg}
211315371Smjg
212150627Sjhb/*
213150627Sjhb * Atomically add the value of v to the integer pointed to by p and return
214150627Sjhb * the previous value of *p.
215150627Sjhb */
216150627Sjhbstatic __inline u_int
217150627Sjhbatomic_fetchadd_int(volatile u_int *p, u_int v)
218150627Sjhb{
219150627Sjhb
220165633Sbde	__asm __volatile(
221165630Sbde	"	" MPLOCKED "		"
222254610Sjkim	"	xaddl	%0,%1 ;		"
223150627Sjhb	"# atomic_fetchadd_int"
224254610Sjkim	: "+r" (v),			/* 0 */
225254612Sjkim	  "+m" (*p)			/* 1 */
226254612Sjkim	: : "cc");
227150627Sjhb	return (v);
228150627Sjhb}
229150627Sjhb
230254617Sjkimstatic __inline int
231254617Sjkimatomic_testandset_int(volatile u_int *p, u_int v)
232254617Sjkim{
233254617Sjkim	u_char res;
234254617Sjkim
235254617Sjkim	__asm __volatile(
236254617Sjkim	"	" MPLOCKED "		"
237254617Sjkim	"	btsl	%2,%1 ;		"
238254617Sjkim	"	setc	%0 ;		"
239254617Sjkim	"# atomic_testandset_int"
240254617Sjkim	: "=q" (res),			/* 0 */
241254617Sjkim	  "+m" (*p)			/* 1 */
242254617Sjkim	: "Ir" (v & 0x1f)		/* 2 */
243254617Sjkim	: "cc");
244254617Sjkim	return (res);
245254617Sjkim}
246254617Sjkim
247299912Ssephestatic __inline int
248299912Ssepheatomic_testandclear_int(volatile u_int *p, u_int v)
249299912Ssephe{
250299912Ssephe	u_char res;
251299912Ssephe
252299912Ssephe	__asm __volatile(
253299912Ssephe	"	" MPLOCKED "		"
254299912Ssephe	"	btrl	%2,%1 ;		"
255299912Ssephe	"	setc	%0 ;		"
256299912Ssephe	"# atomic_testandclear_int"
257299912Ssephe	: "=q" (res),			/* 0 */
258299912Ssephe	  "+m" (*p)			/* 1 */
259299912Ssephe	: "Ir" (v & 0x1f)		/* 2 */
260299912Ssephe	: "cc");
261299912Ssephe	return (res);
262299912Ssephe}
263299912Ssephe
264236456Skib/*
265236456Skib * We assume that a = b will do atomic loads and stores.  Due to the
266236456Skib * IA32 memory model, a simple store guarantees release semantics.
267236456Skib *
268284901Skib * However, a load may pass a store if they are performed on distinct
269286050Skib * addresses, so we need Store/Load barrier for sequentially
270286050Skib * consistent fences in SMP kernels.  We use "lock addl $0,mem" for a
271286050Skib * Store/Load barrier, as recommended by the AMD Software Optimization
272286050Skib * Guide, and not mfence.  In the kernel, we use a private per-cpu
273286078Skib * cache line for "mem", to avoid introducing false data
274286078Skib * dependencies.  In user space, we use the word at the top of the
275286078Skib * stack.
276284901Skib *
277284901Skib * For UP kernels, however, the memory of the single processor is
278284901Skib * always consistent, so we only need to stop the compiler from
279284901Skib * reordering accesses in a way that violates the semantics of acquire
280284901Skib * and release.
281236456Skib */
282286051Skib
283284901Skib#if defined(_KERNEL)
284284901Skib#if defined(SMP)
285286051Skib#define	__storeload_barrier()	__mbk()
286284901Skib#else /* _KERNEL && UP */
287286051Skib#define	__storeload_barrier()	__compiler_membar()
288284901Skib#endif /* SMP */
289284901Skib#else /* !_KERNEL */
290286051Skib#define	__storeload_barrier()	__mbu()
291284901Skib#endif /* _KERNEL*/
292284901Skib
293284901Skib#define	ATOMIC_LOAD(TYPE)					\
294284901Skibstatic __inline u_##TYPE					\
295284901Skibatomic_load_acq_##TYPE(volatile u_##TYPE *p)			\
296284901Skib{								\
297284901Skib	u_##TYPE res;						\
298284901Skib								\
299284901Skib	res = *p;						\
300284901Skib	__compiler_membar();					\
301284901Skib	return (res);						\
302284901Skib}								\
303122827Sbdestruct __hack
304100327Smarkm
305284901Skib#define	ATOMIC_STORE(TYPE)					\
306284901Skibstatic __inline void						\
307284901Skibatomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)	\
308284901Skib{								\
309284901Skib								\
310284901Skib	__compiler_membar();					\
311284901Skib	*p = v;							\
312284901Skib}								\
313284901Skibstruct __hack
314100327Smarkm
315285283Skibstatic __inline void
316285283Skibatomic_thread_fence_acq(void)
317285283Skib{
318285283Skib
319285283Skib	__compiler_membar();
320285283Skib}
321285283Skib
322285283Skibstatic __inline void
323285283Skibatomic_thread_fence_rel(void)
324285283Skib{
325285283Skib
326285283Skib	__compiler_membar();
327285283Skib}
328285283Skib
329285283Skibstatic __inline void
330285283Skibatomic_thread_fence_acq_rel(void)
331285283Skib{
332285283Skib
333285283Skib	__compiler_membar();
334285283Skib}
335285283Skib
336285283Skibstatic __inline void
337285283Skibatomic_thread_fence_seq_cst(void)
338285283Skib{
339285283Skib
340285283Skib	__storeload_barrier();
341285283Skib}
342285283Skib
343254619Sjkim#ifdef _KERNEL
344254619Sjkim
345254619Sjkim#ifdef WANT_FUNCTIONS
346254620Sjkimint		atomic_cmpset_64_i386(volatile uint64_t *, uint64_t, uint64_t);
347254620Sjkimint		atomic_cmpset_64_i586(volatile uint64_t *, uint64_t, uint64_t);
348254619Sjkimuint64_t	atomic_load_acq_64_i386(volatile uint64_t *);
349254619Sjkimuint64_t	atomic_load_acq_64_i586(volatile uint64_t *);
350254619Sjkimvoid		atomic_store_rel_64_i386(volatile uint64_t *, uint64_t);
351254619Sjkimvoid		atomic_store_rel_64_i586(volatile uint64_t *, uint64_t);
352254620Sjkimuint64_t	atomic_swap_64_i386(volatile uint64_t *, uint64_t);
353254620Sjkimuint64_t	atomic_swap_64_i586(volatile uint64_t *, uint64_t);
354254619Sjkim#endif
355254619Sjkim
356254619Sjkim/* I486 does not support SMP or CMPXCHG8B. */
357254620Sjkimstatic __inline int
358254620Sjkimatomic_cmpset_64_i386(volatile uint64_t *dst, uint64_t expect, uint64_t src)
359254620Sjkim{
360254620Sjkim	volatile uint32_t *p;
361254620Sjkim	u_char res;
362254620Sjkim
363254620Sjkim	p = (volatile uint32_t *)dst;
364254620Sjkim	__asm __volatile(
365254620Sjkim	"	pushfl ;		"
366254620Sjkim	"	cli ;			"
367254620Sjkim	"	xorl	%1,%%eax ;	"
368254620Sjkim	"	xorl	%2,%%edx ;	"
369254620Sjkim	"	orl	%%edx,%%eax ;	"
370254620Sjkim	"	jne	1f ;		"
371254620Sjkim	"	movl	%4,%1 ;		"
372254620Sjkim	"	movl	%5,%2 ;		"
373254620Sjkim	"1:				"
374254620Sjkim	"	sete	%3 ;		"
375254620Sjkim	"	popfl"
376254620Sjkim	: "+A" (expect),		/* 0 */
377254620Sjkim	  "+m" (*p),			/* 1 */
378254620Sjkim	  "+m" (*(p + 1)),		/* 2 */
379254620Sjkim	  "=q" (res)			/* 3 */
380254620Sjkim	: "r" ((uint32_t)src),		/* 4 */
381254620Sjkim	  "r" ((uint32_t)(src >> 32))	/* 5 */
382254620Sjkim	: "memory", "cc");
383254620Sjkim	return (res);
384254620Sjkim}
385254620Sjkim
386254619Sjkimstatic __inline uint64_t
387254619Sjkimatomic_load_acq_64_i386(volatile uint64_t *p)
388254619Sjkim{
389254619Sjkim	volatile uint32_t *q;
390254619Sjkim	uint64_t res;
391254619Sjkim
392254619Sjkim	q = (volatile uint32_t *)p;
393254619Sjkim	__asm __volatile(
394254619Sjkim	"	pushfl ;		"
395254619Sjkim	"	cli ;			"
396254619Sjkim	"	movl	%1,%%eax ;	"
397254619Sjkim	"	movl	%2,%%edx ;	"
398254619Sjkim	"	popfl"
399254619Sjkim	: "=&A" (res)			/* 0 */
400254619Sjkim	: "m" (*q),			/* 1 */
401254619Sjkim	  "m" (*(q + 1))		/* 2 */
402254619Sjkim	: "memory");
403254619Sjkim	return (res);
404254619Sjkim}
405254619Sjkim
406254619Sjkimstatic __inline void
407254619Sjkimatomic_store_rel_64_i386(volatile uint64_t *p, uint64_t v)
408254619Sjkim{
409254619Sjkim	volatile uint32_t *q;
410254619Sjkim
411254619Sjkim	q = (volatile uint32_t *)p;
412254619Sjkim	__asm __volatile(
413254619Sjkim	"	pushfl ;		"
414254619Sjkim	"	cli ;			"
415254619Sjkim	"	movl	%%eax,%0 ;	"
416254619Sjkim	"	movl	%%edx,%1 ;	"
417254619Sjkim	"	popfl"
418254619Sjkim	: "=m" (*q),			/* 0 */
419254619Sjkim	  "=m" (*(q + 1))		/* 1 */
420254619Sjkim	: "A" (v)			/* 2 */
421254619Sjkim	: "memory");
422254619Sjkim}
423254619Sjkim
424254619Sjkimstatic __inline uint64_t
425254620Sjkimatomic_swap_64_i386(volatile uint64_t *p, uint64_t v)
426254620Sjkim{
427254620Sjkim	volatile uint32_t *q;
428254620Sjkim	uint64_t res;
429254620Sjkim
430254620Sjkim	q = (volatile uint32_t *)p;
431254620Sjkim	__asm __volatile(
432254620Sjkim	"	pushfl ;		"
433254620Sjkim	"	cli ;			"
434254620Sjkim	"	movl	%1,%%eax ;	"
435254620Sjkim	"	movl	%2,%%edx ;	"
436254620Sjkim	"	movl	%4,%2 ;		"
437254620Sjkim	"	movl	%3,%1 ;		"
438254620Sjkim	"	popfl"
439254620Sjkim	: "=&A" (res),			/* 0 */
440254620Sjkim	  "+m" (*q),			/* 1 */
441254620Sjkim	  "+m" (*(q + 1))		/* 2 */
442254620Sjkim	: "r" ((uint32_t)v),		/* 3 */
443254620Sjkim	  "r" ((uint32_t)(v >> 32)));	/* 4 */
444254620Sjkim	return (res);
445254620Sjkim}
446254620Sjkim
447254620Sjkimstatic __inline int
448254620Sjkimatomic_cmpset_64_i586(volatile uint64_t *dst, uint64_t expect, uint64_t src)
449254620Sjkim{
450254620Sjkim	u_char res;
451254620Sjkim
452254620Sjkim	__asm __volatile(
453254620Sjkim	"	" MPLOCKED "		"
454254620Sjkim	"	cmpxchg8b %1 ;		"
455254620Sjkim	"	sete	%0"
456254620Sjkim	: "=q" (res),			/* 0 */
457254620Sjkim	  "+m" (*dst),			/* 1 */
458254620Sjkim	  "+A" (expect)			/* 2 */
459254620Sjkim	: "b" ((uint32_t)src),		/* 3 */
460254620Sjkim	  "c" ((uint32_t)(src >> 32))	/* 4 */
461254620Sjkim	: "memory", "cc");
462254620Sjkim	return (res);
463254620Sjkim}
464254620Sjkim
465254620Sjkimstatic __inline uint64_t
466254619Sjkimatomic_load_acq_64_i586(volatile uint64_t *p)
467254619Sjkim{
468254619Sjkim	uint64_t res;
469254619Sjkim
470254619Sjkim	__asm __volatile(
471254619Sjkim	"	movl	%%ebx,%%eax ;	"
472254619Sjkim	"	movl	%%ecx,%%edx ;	"
473254619Sjkim	"	" MPLOCKED "		"
474254619Sjkim	"	cmpxchg8b %1"
475254619Sjkim	: "=&A" (res),			/* 0 */
476254619Sjkim	  "+m" (*p)			/* 1 */
477254619Sjkim	: : "memory", "cc");
478254619Sjkim	return (res);
479254619Sjkim}
480254619Sjkim
481254619Sjkimstatic __inline void
482254619Sjkimatomic_store_rel_64_i586(volatile uint64_t *p, uint64_t v)
483254619Sjkim{
484254619Sjkim
485254619Sjkim	__asm __volatile(
486254619Sjkim	"	movl	%%eax,%%ebx ;	"
487254619Sjkim	"	movl	%%edx,%%ecx ;	"
488254619Sjkim	"1:				"
489254619Sjkim	"	" MPLOCKED "		"
490254619Sjkim	"	cmpxchg8b %0 ;		"
491254619Sjkim	"	jne	1b"
492254619Sjkim	: "+m" (*p),			/* 0 */
493254619Sjkim	  "+A" (v)			/* 1 */
494254619Sjkim	: : "ebx", "ecx", "memory", "cc");
495254619Sjkim}
496254619Sjkim
497254619Sjkimstatic __inline uint64_t
498254620Sjkimatomic_swap_64_i586(volatile uint64_t *p, uint64_t v)
499254620Sjkim{
500254620Sjkim
501254620Sjkim	__asm __volatile(
502254620Sjkim	"	movl	%%eax,%%ebx ;	"
503254620Sjkim	"	movl	%%edx,%%ecx ;	"
504254620Sjkim	"1:				"
505254620Sjkim	"	" MPLOCKED "		"
506254620Sjkim	"	cmpxchg8b %0 ;		"
507254620Sjkim	"	jne	1b"
508254620Sjkim	: "+m" (*p),			/* 0 */
509254620Sjkim	  "+A" (v)			/* 1 */
510254620Sjkim	: : "ebx", "ecx", "memory", "cc");
511254620Sjkim	return (v);
512254620Sjkim}
513254620Sjkim
514254620Sjkimstatic __inline int
515254620Sjkimatomic_cmpset_64(volatile uint64_t *dst, uint64_t expect, uint64_t src)
516254620Sjkim{
517254620Sjkim
518254620Sjkim	if ((cpu_feature & CPUID_CX8) == 0)
519254620Sjkim		return (atomic_cmpset_64_i386(dst, expect, src));
520254620Sjkim	else
521254620Sjkim		return (atomic_cmpset_64_i586(dst, expect, src));
522254620Sjkim}
523254620Sjkim
524254620Sjkimstatic __inline uint64_t
525254619Sjkimatomic_load_acq_64(volatile uint64_t *p)
526254619Sjkim{
527254619Sjkim
528254619Sjkim	if ((cpu_feature & CPUID_CX8) == 0)
529254619Sjkim		return (atomic_load_acq_64_i386(p));
530254619Sjkim	else
531254619Sjkim		return (atomic_load_acq_64_i586(p));
532254619Sjkim}
533254619Sjkim
534254619Sjkimstatic __inline void
535254619Sjkimatomic_store_rel_64(volatile uint64_t *p, uint64_t v)
536254619Sjkim{
537254619Sjkim
538254619Sjkim	if ((cpu_feature & CPUID_CX8) == 0)
539254619Sjkim		atomic_store_rel_64_i386(p, v);
540254619Sjkim	else
541254619Sjkim		atomic_store_rel_64_i586(p, v);
542254619Sjkim}
543254619Sjkim
544254620Sjkimstatic __inline uint64_t
545254620Sjkimatomic_swap_64(volatile uint64_t *p, uint64_t v)
546254620Sjkim{
547254620Sjkim
548254620Sjkim	if ((cpu_feature & CPUID_CX8) == 0)
549254620Sjkim		return (atomic_swap_64_i386(p, v));
550254620Sjkim	else
551254620Sjkim		return (atomic_swap_64_i586(p, v));
552254620Sjkim}
553254620Sjkim
554326514Shselaskystatic __inline uint64_t
555326514Shselaskyatomic_fetchadd_64(volatile uint64_t *p, uint64_t v)
556326514Shselasky{
557326514Shselasky
558326514Shselasky	for (;;) {
559326514Shselasky		uint64_t t = *p;
560326514Shselasky		if (atomic_cmpset_64(p, t, t + v))
561326514Shselasky			return (t);
562326514Shselasky	}
563326514Shselasky}
564326514Shselasky
565254619Sjkim#endif /* _KERNEL */
566254619Sjkim
567147855Sjhb#endif /* KLD_MODULE || !__GNUCLIKE_ASM */
568100251Smarkm
569100251SmarkmATOMIC_ASM(set,	     char,  "orb %b1,%0",  "iq",  v);
570100251SmarkmATOMIC_ASM(clear,    char,  "andb %b1,%0", "iq", ~v);
571100251SmarkmATOMIC_ASM(add,	     char,  "addb %b1,%0", "iq",  v);
572100251SmarkmATOMIC_ASM(subtract, char,  "subb %b1,%0", "iq",  v);
57371085Sjhb
574100251SmarkmATOMIC_ASM(set,	     short, "orw %w1,%0",  "ir",  v);
575100251SmarkmATOMIC_ASM(clear,    short, "andw %w1,%0", "ir", ~v);
576100251SmarkmATOMIC_ASM(add,	     short, "addw %w1,%0", "ir",  v);
577100251SmarkmATOMIC_ASM(subtract, short, "subw %w1,%0", "ir",  v);
57871085Sjhb
579100251SmarkmATOMIC_ASM(set,	     int,   "orl %1,%0",   "ir",  v);
580100251SmarkmATOMIC_ASM(clear,    int,   "andl %1,%0",  "ir", ~v);
581100251SmarkmATOMIC_ASM(add,	     int,   "addl %1,%0",  "ir",  v);
582100251SmarkmATOMIC_ASM(subtract, int,   "subl %1,%0",  "ir",  v);
58371085Sjhb
584100251SmarkmATOMIC_ASM(set,	     long,  "orl %1,%0",   "ir",  v);
585100251SmarkmATOMIC_ASM(clear,    long,  "andl %1,%0",  "ir", ~v);
586100251SmarkmATOMIC_ASM(add,	     long,  "addl %1,%0",  "ir",  v);
587100251SmarkmATOMIC_ASM(subtract, long,  "subl %1,%0",  "ir",  v);
58871085Sjhb
589284901Skib#define	ATOMIC_LOADSTORE(TYPE)				\
590284901Skib	ATOMIC_LOAD(TYPE);				\
591284901Skib	ATOMIC_STORE(TYPE)
59271023Sjhb
593284901SkibATOMIC_LOADSTORE(char);
594284901SkibATOMIC_LOADSTORE(short);
595284901SkibATOMIC_LOADSTORE(int);
596284901SkibATOMIC_LOADSTORE(long);
597236456Skib
59871085Sjhb#undef ATOMIC_ASM
599236456Skib#undef ATOMIC_LOAD
600236456Skib#undef ATOMIC_STORE
601284901Skib#undef ATOMIC_LOADSTORE
60267351Sjhb
603165635Sbde#ifndef WANT_FUNCTIONS
604147855Sjhb
605147855Sjhbstatic __inline int
606208332Sphkatomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src)
607147855Sjhb{
608147855Sjhb
609208332Sphk	return (atomic_cmpset_int((volatile u_int *)dst, (u_int)expect,
610147855Sjhb	    (u_int)src));
611147855Sjhb}
612147855Sjhb
613177276Spjdstatic __inline u_long
614177276Spjdatomic_fetchadd_long(volatile u_long *p, u_long v)
615177276Spjd{
616177276Spjd
617177276Spjd	return (atomic_fetchadd_int((volatile u_int *)p, (u_int)v));
618177276Spjd}
619177276Spjd
620254617Sjkimstatic __inline int
621254617Sjkimatomic_testandset_long(volatile u_long *p, u_int v)
622254617Sjkim{
623254617Sjkim
624254617Sjkim	return (atomic_testandset_int((volatile u_int *)p, v));
625254617Sjkim}
626254617Sjkim
627299912Ssephestatic __inline int
628299912Ssepheatomic_testandclear_long(volatile u_long *p, u_int v)
629299912Ssephe{
630299912Ssephe
631299912Ssephe	return (atomic_testandclear_int((volatile u_int *)p, v));
632299912Ssephe}
633299912Ssephe
634254617Sjkim/* Read the current value and store a new value in the destination. */
635147855Sjhb#ifdef __GNUCLIKE_ASM
636147855Sjhb
637147855Sjhbstatic __inline u_int
638254617Sjkimatomic_swap_int(volatile u_int *p, u_int v)
639147855Sjhb{
640147855Sjhb
641165633Sbde	__asm __volatile(
642147855Sjhb	"	xchgl	%1,%0 ;		"
643254617Sjkim	"# atomic_swap_int"
644254617Sjkim	: "+r" (v),			/* 0 */
645254612Sjkim	  "+m" (*p));			/* 1 */
646254617Sjkim	return (v);
647147855Sjhb}
648147855Sjhb
649147855Sjhbstatic __inline u_long
650254617Sjkimatomic_swap_long(volatile u_long *p, u_long v)
651147855Sjhb{
652147855Sjhb
653254617Sjkim	return (atomic_swap_int((volatile u_int *)p, (u_int)v));
654147855Sjhb}
655147855Sjhb
656147855Sjhb#else /* !__GNUCLIKE_ASM */
657147855Sjhb
658254617Sjkimu_int	atomic_swap_int(volatile u_int *p, u_int v);
659254617Sjkimu_long	atomic_swap_long(volatile u_long *p, u_long v);
660147855Sjhb
661147855Sjhb#endif /* __GNUCLIKE_ASM */
662147855Sjhb
663197803Sattilio#define	atomic_set_acq_char		atomic_set_barr_char
664197803Sattilio#define	atomic_set_rel_char		atomic_set_barr_char
665197803Sattilio#define	atomic_clear_acq_char		atomic_clear_barr_char
666197803Sattilio#define	atomic_clear_rel_char		atomic_clear_barr_char
667197803Sattilio#define	atomic_add_acq_char		atomic_add_barr_char
668197803Sattilio#define	atomic_add_rel_char		atomic_add_barr_char
669197803Sattilio#define	atomic_subtract_acq_char	atomic_subtract_barr_char
670197803Sattilio#define	atomic_subtract_rel_char	atomic_subtract_barr_char
67171085Sjhb
672197803Sattilio#define	atomic_set_acq_short		atomic_set_barr_short
673197803Sattilio#define	atomic_set_rel_short		atomic_set_barr_short
674197803Sattilio#define	atomic_clear_acq_short		atomic_clear_barr_short
675197803Sattilio#define	atomic_clear_rel_short		atomic_clear_barr_short
676197803Sattilio#define	atomic_add_acq_short		atomic_add_barr_short
677197803Sattilio#define	atomic_add_rel_short		atomic_add_barr_short
678197803Sattilio#define	atomic_subtract_acq_short	atomic_subtract_barr_short
679197803Sattilio#define	atomic_subtract_rel_short	atomic_subtract_barr_short
68071085Sjhb
681197803Sattilio#define	atomic_set_acq_int		atomic_set_barr_int
682197803Sattilio#define	atomic_set_rel_int		atomic_set_barr_int
683197803Sattilio#define	atomic_clear_acq_int		atomic_clear_barr_int
684197803Sattilio#define	atomic_clear_rel_int		atomic_clear_barr_int
685197803Sattilio#define	atomic_add_acq_int		atomic_add_barr_int
686197803Sattilio#define	atomic_add_rel_int		atomic_add_barr_int
687197803Sattilio#define	atomic_subtract_acq_int		atomic_subtract_barr_int
688197803Sattilio#define	atomic_subtract_rel_int		atomic_subtract_barr_int
689197910Sattilio#define	atomic_cmpset_acq_int		atomic_cmpset_int
690197910Sattilio#define	atomic_cmpset_rel_int		atomic_cmpset_int
691315371Smjg#define	atomic_fcmpset_acq_int		atomic_fcmpset_int
692315371Smjg#define	atomic_fcmpset_rel_int		atomic_fcmpset_int
69371085Sjhb
694197803Sattilio#define	atomic_set_acq_long		atomic_set_barr_long
695197803Sattilio#define	atomic_set_rel_long		atomic_set_barr_long
696197803Sattilio#define	atomic_clear_acq_long		atomic_clear_barr_long
697197803Sattilio#define	atomic_clear_rel_long		atomic_clear_barr_long
698197803Sattilio#define	atomic_add_acq_long		atomic_add_barr_long
699197803Sattilio#define	atomic_add_rel_long		atomic_add_barr_long
700197803Sattilio#define	atomic_subtract_acq_long	atomic_subtract_barr_long
701197803Sattilio#define	atomic_subtract_rel_long	atomic_subtract_barr_long
702197910Sattilio#define	atomic_cmpset_acq_long		atomic_cmpset_long
703197910Sattilio#define	atomic_cmpset_rel_long		atomic_cmpset_long
704315371Smjg#define	atomic_fcmpset_acq_long		atomic_fcmpset_long
705315371Smjg#define	atomic_fcmpset_rel_long		atomic_fcmpset_long
70671085Sjhb
707254617Sjkim#define	atomic_readandclear_int(p)	atomic_swap_int(p, 0)
708254617Sjkim#define	atomic_readandclear_long(p)	atomic_swap_long(p, 0)
709254617Sjkim
710147855Sjhb/* Operations on 8-bit bytes. */
71171085Sjhb#define	atomic_set_8		atomic_set_char
71271085Sjhb#define	atomic_set_acq_8	atomic_set_acq_char
71371085Sjhb#define	atomic_set_rel_8	atomic_set_rel_char
71471085Sjhb#define	atomic_clear_8		atomic_clear_char
71571085Sjhb#define	atomic_clear_acq_8	atomic_clear_acq_char
71671085Sjhb#define	atomic_clear_rel_8	atomic_clear_rel_char
71771085Sjhb#define	atomic_add_8		atomic_add_char
71871085Sjhb#define	atomic_add_acq_8	atomic_add_acq_char
71971085Sjhb#define	atomic_add_rel_8	atomic_add_rel_char
72071085Sjhb#define	atomic_subtract_8	atomic_subtract_char
72171085Sjhb#define	atomic_subtract_acq_8	atomic_subtract_acq_char
72271085Sjhb#define	atomic_subtract_rel_8	atomic_subtract_rel_char
72371085Sjhb#define	atomic_load_acq_8	atomic_load_acq_char
72471085Sjhb#define	atomic_store_rel_8	atomic_store_rel_char
72571085Sjhb
726147855Sjhb/* Operations on 16-bit words. */
72771085Sjhb#define	atomic_set_16		atomic_set_short
72871085Sjhb#define	atomic_set_acq_16	atomic_set_acq_short
72971085Sjhb#define	atomic_set_rel_16	atomic_set_rel_short
73071085Sjhb#define	atomic_clear_16		atomic_clear_short
73171085Sjhb#define	atomic_clear_acq_16	atomic_clear_acq_short
73271085Sjhb#define	atomic_clear_rel_16	atomic_clear_rel_short
73371085Sjhb#define	atomic_add_16		atomic_add_short
73471085Sjhb#define	atomic_add_acq_16	atomic_add_acq_short
73571085Sjhb#define	atomic_add_rel_16	atomic_add_rel_short
73671085Sjhb#define	atomic_subtract_16	atomic_subtract_short
73771085Sjhb#define	atomic_subtract_acq_16	atomic_subtract_acq_short
73871085Sjhb#define	atomic_subtract_rel_16	atomic_subtract_rel_short
73971085Sjhb#define	atomic_load_acq_16	atomic_load_acq_short
74071085Sjhb#define	atomic_store_rel_16	atomic_store_rel_short
74171085Sjhb
742147855Sjhb/* Operations on 32-bit double words. */
74371085Sjhb#define	atomic_set_32		atomic_set_int
74471085Sjhb#define	atomic_set_acq_32	atomic_set_acq_int
74571085Sjhb#define	atomic_set_rel_32	atomic_set_rel_int
74671085Sjhb#define	atomic_clear_32		atomic_clear_int
74771085Sjhb#define	atomic_clear_acq_32	atomic_clear_acq_int
74871085Sjhb#define	atomic_clear_rel_32	atomic_clear_rel_int
74971085Sjhb#define	atomic_add_32		atomic_add_int
75071085Sjhb#define	atomic_add_acq_32	atomic_add_acq_int
75171085Sjhb#define	atomic_add_rel_32	atomic_add_rel_int
75271085Sjhb#define	atomic_subtract_32	atomic_subtract_int
75371085Sjhb#define	atomic_subtract_acq_32	atomic_subtract_acq_int
75471085Sjhb#define	atomic_subtract_rel_32	atomic_subtract_rel_int
75571085Sjhb#define	atomic_load_acq_32	atomic_load_acq_int
75671085Sjhb#define	atomic_store_rel_32	atomic_store_rel_int
75771085Sjhb#define	atomic_cmpset_32	atomic_cmpset_int
75871085Sjhb#define	atomic_cmpset_acq_32	atomic_cmpset_acq_int
75971085Sjhb#define	atomic_cmpset_rel_32	atomic_cmpset_rel_int
760315371Smjg#define	atomic_fcmpset_32	atomic_fcmpset_int
761315371Smjg#define	atomic_fcmpset_acq_32	atomic_fcmpset_acq_int
762315371Smjg#define	atomic_fcmpset_rel_32	atomic_fcmpset_rel_int
763254617Sjkim#define	atomic_swap_32		atomic_swap_int
76471085Sjhb#define	atomic_readandclear_32	atomic_readandclear_int
765150627Sjhb#define	atomic_fetchadd_32	atomic_fetchadd_int
766254617Sjkim#define	atomic_testandset_32	atomic_testandset_int
767299912Ssephe#define	atomic_testandclear_32	atomic_testandclear_int
76871085Sjhb
769147855Sjhb/* Operations on pointers. */
770157212Sdes#define	atomic_set_ptr(p, v) \
771157212Sdes	atomic_set_int((volatile u_int *)(p), (u_int)(v))
772157212Sdes#define	atomic_set_acq_ptr(p, v) \
773157212Sdes	atomic_set_acq_int((volatile u_int *)(p), (u_int)(v))
774157212Sdes#define	atomic_set_rel_ptr(p, v) \
775157212Sdes	atomic_set_rel_int((volatile u_int *)(p), (u_int)(v))
776157212Sdes#define	atomic_clear_ptr(p, v) \
777157212Sdes	atomic_clear_int((volatile u_int *)(p), (u_int)(v))
778157212Sdes#define	atomic_clear_acq_ptr(p, v) \
779157212Sdes	atomic_clear_acq_int((volatile u_int *)(p), (u_int)(v))
780157212Sdes#define	atomic_clear_rel_ptr(p, v) \
781157212Sdes	atomic_clear_rel_int((volatile u_int *)(p), (u_int)(v))
782157212Sdes#define	atomic_add_ptr(p, v) \
783157212Sdes	atomic_add_int((volatile u_int *)(p), (u_int)(v))
784157212Sdes#define	atomic_add_acq_ptr(p, v) \
785157212Sdes	atomic_add_acq_int((volatile u_int *)(p), (u_int)(v))
786157212Sdes#define	atomic_add_rel_ptr(p, v) \
787157212Sdes	atomic_add_rel_int((volatile u_int *)(p), (u_int)(v))
788157212Sdes#define	atomic_subtract_ptr(p, v) \
789157212Sdes	atomic_subtract_int((volatile u_int *)(p), (u_int)(v))
790157212Sdes#define	atomic_subtract_acq_ptr(p, v) \
791157212Sdes	atomic_subtract_acq_int((volatile u_int *)(p), (u_int)(v))
792157212Sdes#define	atomic_subtract_rel_ptr(p, v) \
793157212Sdes	atomic_subtract_rel_int((volatile u_int *)(p), (u_int)(v))
794157212Sdes#define	atomic_load_acq_ptr(p) \
795157212Sdes	atomic_load_acq_int((volatile u_int *)(p))
796157212Sdes#define	atomic_store_rel_ptr(p, v) \
797157212Sdes	atomic_store_rel_int((volatile u_int *)(p), (v))
798157212Sdes#define	atomic_cmpset_ptr(dst, old, new) \
799157212Sdes	atomic_cmpset_int((volatile u_int *)(dst), (u_int)(old), (u_int)(new))
800157212Sdes#define	atomic_cmpset_acq_ptr(dst, old, new) \
801165633Sbde	atomic_cmpset_acq_int((volatile u_int *)(dst), (u_int)(old), \
802165633Sbde	    (u_int)(new))
803157212Sdes#define	atomic_cmpset_rel_ptr(dst, old, new) \
804165633Sbde	atomic_cmpset_rel_int((volatile u_int *)(dst), (u_int)(old), \
805165633Sbde	    (u_int)(new))
806315371Smjg#define	atomic_fcmpset_ptr(dst, old, new) \
807315371Smjg	atomic_fcmpset_int((volatile u_int *)(dst), (u_int *)(old), (u_int)(new))
808315371Smjg#define	atomic_fcmpset_acq_ptr(dst, old, new) \
809315371Smjg	atomic_fcmpset_acq_int((volatile u_int *)(dst), (u_int *)(old), \
810315371Smjg	    (u_int)(new))
811315371Smjg#define	atomic_fcmpset_rel_ptr(dst, old, new) \
812315371Smjg	atomic_fcmpset_rel_int((volatile u_int *)(dst), (u_int *)(old), \
813315371Smjg	    (u_int)(new))
814254617Sjkim#define	atomic_swap_ptr(p, v) \
815254617Sjkim	atomic_swap_int((volatile u_int *)(p), (u_int)(v))
816157212Sdes#define	atomic_readandclear_ptr(p) \
817157212Sdes	atomic_readandclear_int((volatile u_int *)(p))
81865514Sphk
819165635Sbde#endif /* !WANT_FUNCTIONS */
820165633Sbde
821286051Skib#if defined(_KERNEL)
822286051Skib#define	mb()	__mbk()
823286051Skib#define	wmb()	__mbk()
824286051Skib#define	rmb()	__mbk()
825286051Skib#else
826286051Skib#define	mb()	__mbu()
827286051Skib#define	wmb()	__mbu()
828286051Skib#define	rmb()	__mbu()
829286051Skib#endif
830286051Skib
831165633Sbde#endif /* !_MACHINE_ATOMIC_H_ */
832