atomic.h revision 338588
1/*-
2 * Copyright (c) 1998 Doug Rabson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/11/sys/i386/include/atomic.h 338588 2018-09-11 15:56:06Z hselasky $
27 */
28#ifndef _MACHINE_ATOMIC_H_
29#define	_MACHINE_ATOMIC_H_
30
31#ifndef _SYS_CDEFS_H_
32#error this file needs sys/cdefs.h as a prerequisite
33#endif
34
35#include <sys/atomic_common.h>
36
37#ifdef _KERNEL
38#include <machine/md_var.h>
39#include <machine/specialreg.h>
40#endif
41
42#ifndef __OFFSETOF_MONITORBUF
43/*
44 * __OFFSETOF_MONITORBUF == __pcpu_offset(pc_monitorbuf).
45 *
46 * The open-coded number is used instead of the symbolic expression to
47 * avoid a dependency on sys/pcpu.h in machine/atomic.h consumers.
48 * An assertion in i386/vm_machdep.c ensures that the value is correct.
49 */
50#define	__OFFSETOF_MONITORBUF	0x180
51
52static __inline void
53__mbk(void)
54{
55
56	__asm __volatile("lock; addl $0,%%fs:%0"
57	    : "+m" (*(u_int *)__OFFSETOF_MONITORBUF) : : "memory", "cc");
58}
59
60static __inline void
61__mbu(void)
62{
63
64	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc");
65}
66#endif
67
68/*
69 * Various simple operations on memory, each of which is atomic in the
70 * presence of interrupts and multiple processors.
71 *
72 * atomic_set_char(P, V)	(*(u_char *)(P) |= (V))
73 * atomic_clear_char(P, V)	(*(u_char *)(P) &= ~(V))
74 * atomic_add_char(P, V)	(*(u_char *)(P) += (V))
75 * atomic_subtract_char(P, V)	(*(u_char *)(P) -= (V))
76 *
77 * atomic_set_short(P, V)	(*(u_short *)(P) |= (V))
78 * atomic_clear_short(P, V)	(*(u_short *)(P) &= ~(V))
79 * atomic_add_short(P, V)	(*(u_short *)(P) += (V))
80 * atomic_subtract_short(P, V)	(*(u_short *)(P) -= (V))
81 *
82 * atomic_set_int(P, V)		(*(u_int *)(P) |= (V))
83 * atomic_clear_int(P, V)	(*(u_int *)(P) &= ~(V))
84 * atomic_add_int(P, V)		(*(u_int *)(P) += (V))
85 * atomic_subtract_int(P, V)	(*(u_int *)(P) -= (V))
86 * atomic_swap_int(P, V)	(return (*(u_int *)(P)); *(u_int *)(P) = (V);)
87 * atomic_readandclear_int(P)	(return (*(u_int *)(P)); *(u_int *)(P) = 0;)
88 *
89 * atomic_set_long(P, V)	(*(u_long *)(P) |= (V))
90 * atomic_clear_long(P, V)	(*(u_long *)(P) &= ~(V))
91 * atomic_add_long(P, V)	(*(u_long *)(P) += (V))
92 * atomic_subtract_long(P, V)	(*(u_long *)(P) -= (V))
93 * atomic_swap_long(P, V)	(return (*(u_long *)(P)); *(u_long *)(P) = (V);)
94 * atomic_readandclear_long(P)	(return (*(u_long *)(P)); *(u_long *)(P) = 0;)
95 */
96
97/*
98 * The above functions are expanded inline in the statically-linked
99 * kernel.  Lock prefixes are generated if an SMP kernel is being
100 * built.
101 *
102 * Kernel modules call real functions which are built into the kernel.
103 * This allows kernel modules to be portable between UP and SMP systems.
104 */
105#if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM)
106#define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)			\
107void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v);	\
108void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
109
110int	atomic_cmpset_char(volatile u_char *dst, u_char expect, u_char src);
111int	atomic_cmpset_short(volatile u_short *dst, u_short expect, u_short src);
112int	atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src);
113int	atomic_fcmpset_char(volatile u_char *dst, u_char *expect, u_char src);
114int	atomic_fcmpset_short(volatile u_short *dst, u_short *expect,
115	    u_short src);
116int	atomic_fcmpset_int(volatile u_int *dst, u_int *expect, u_int src);
117u_int	atomic_fetchadd_int(volatile u_int *p, u_int v);
118int	atomic_testandset_int(volatile u_int *p, u_int v);
119int	atomic_testandclear_int(volatile u_int *p, u_int v);
120void	atomic_thread_fence_acq(void);
121void	atomic_thread_fence_acq_rel(void);
122void	atomic_thread_fence_rel(void);
123void	atomic_thread_fence_seq_cst(void);
124
125#define	ATOMIC_LOAD(TYPE)					\
126u_##TYPE	atomic_load_acq_##TYPE(volatile u_##TYPE *p)
127#define	ATOMIC_STORE(TYPE)					\
128void		atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
129
130int		atomic_cmpset_64(volatile uint64_t *, uint64_t, uint64_t);
131int		atomic_fcmpset_64(volatile uint64_t *, uint64_t *, uint64_t);
132uint64_t	atomic_load_acq_64(volatile uint64_t *);
133void		atomic_store_rel_64(volatile uint64_t *, uint64_t);
134uint64_t	atomic_swap_64(volatile uint64_t *, uint64_t);
135uint64_t	atomic_fetchadd_64(volatile uint64_t *, uint64_t);
136void		atomic_add_64(volatile uint64_t *, uint64_t);
137void		atomic_subtract_64(volatile uint64_t *, uint64_t);
138
139#else /* !KLD_MODULE && __GNUCLIKE_ASM */
140
141/*
142 * For userland, always use lock prefixes so that the binaries will run
143 * on both SMP and !SMP systems.
144 */
145#if defined(SMP) || !defined(_KERNEL)
146#define	MPLOCKED	"lock ; "
147#else
148#define	MPLOCKED
149#endif
150
151/*
152 * The assembly is volatilized to avoid code chunk removal by the compiler.
153 * GCC aggressively reorders operations and memory clobbering is necessary
154 * in order to avoid that for memory barriers.
155 */
156#define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)		\
157static __inline void					\
158atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
159{							\
160	__asm __volatile(MPLOCKED OP			\
161	: "+m" (*p)					\
162	: CONS (V)					\
163	: "cc");					\
164}							\
165							\
166static __inline void					\
167atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
168{							\
169	__asm __volatile(MPLOCKED OP			\
170	: "+m" (*p)					\
171	: CONS (V)					\
172	: "memory", "cc");				\
173}							\
174struct __hack
175
176/*
177 * Atomic compare and set, used by the mutex functions.
178 *
179 * cmpset:
180 *	if (*dst == expect)
181 *		*dst = src
182 *
183 * fcmpset:
184 *	if (*dst == *expect)
185 *		*dst = src
186 *	else
187 *		*expect = *dst
188 *
189 * Returns 0 on failure, non-zero on success.
190 */
191#define	ATOMIC_CMPSET(TYPE, CONS)			\
192static __inline int					\
193atomic_cmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE expect, u_##TYPE src) \
194{							\
195	u_char res;					\
196							\
197	__asm __volatile(				\
198	"	" MPLOCKED "		"		\
199	"	cmpxchg	%3,%1 ;		"		\
200	"	sete	%0 ;		"		\
201	"# atomic_cmpset_" #TYPE "	"		\
202	: "=q" (res),			/* 0 */		\
203	  "+m" (*dst),			/* 1 */		\
204	  "+a" (expect)			/* 2 */		\
205	: CONS (src)			/* 3 */		\
206	: "memory", "cc");				\
207	return (res);					\
208}							\
209							\
210static __inline int					\
211atomic_fcmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE *expect, u_##TYPE src) \
212{							\
213	u_char res;					\
214							\
215	__asm __volatile(				\
216	"	" MPLOCKED "		"		\
217	"	cmpxchg	%3,%1 ;		"		\
218	"	sete	%0 ;		"		\
219	"# atomic_fcmpset_" #TYPE "	"		\
220	: "=q" (res),			/* 0 */		\
221	  "+m" (*dst),			/* 1 */		\
222	  "+a" (*expect)		/* 2 */		\
223	: CONS (src)			/* 3 */		\
224	: "memory", "cc");				\
225	return (res);					\
226}
227
228ATOMIC_CMPSET(char, "q");
229ATOMIC_CMPSET(short, "r");
230ATOMIC_CMPSET(int, "r");
231
232/*
233 * Atomically add the value of v to the integer pointed to by p and return
234 * the previous value of *p.
235 */
236static __inline u_int
237atomic_fetchadd_int(volatile u_int *p, u_int v)
238{
239
240	__asm __volatile(
241	"	" MPLOCKED "		"
242	"	xaddl	%0,%1 ;		"
243	"# atomic_fetchadd_int"
244	: "+r" (v),			/* 0 */
245	  "+m" (*p)			/* 1 */
246	: : "cc");
247	return (v);
248}
249
250static __inline int
251atomic_testandset_int(volatile u_int *p, u_int v)
252{
253	u_char res;
254
255	__asm __volatile(
256	"	" MPLOCKED "		"
257	"	btsl	%2,%1 ;		"
258	"	setc	%0 ;		"
259	"# atomic_testandset_int"
260	: "=q" (res),			/* 0 */
261	  "+m" (*p)			/* 1 */
262	: "Ir" (v & 0x1f)		/* 2 */
263	: "cc");
264	return (res);
265}
266
267static __inline int
268atomic_testandclear_int(volatile u_int *p, u_int v)
269{
270	u_char res;
271
272	__asm __volatile(
273	"	" MPLOCKED "		"
274	"	btrl	%2,%1 ;		"
275	"	setc	%0 ;		"
276	"# atomic_testandclear_int"
277	: "=q" (res),			/* 0 */
278	  "+m" (*p)			/* 1 */
279	: "Ir" (v & 0x1f)		/* 2 */
280	: "cc");
281	return (res);
282}
283
284/*
285 * We assume that a = b will do atomic loads and stores.  Due to the
286 * IA32 memory model, a simple store guarantees release semantics.
287 *
288 * However, a load may pass a store if they are performed on distinct
289 * addresses, so we need Store/Load barrier for sequentially
290 * consistent fences in SMP kernels.  We use "lock addl $0,mem" for a
291 * Store/Load barrier, as recommended by the AMD Software Optimization
292 * Guide, and not mfence.  In the kernel, we use a private per-cpu
293 * cache line for "mem", to avoid introducing false data
294 * dependencies.  In user space, we use the word at the top of the
295 * stack.
296 *
297 * For UP kernels, however, the memory of the single processor is
298 * always consistent, so we only need to stop the compiler from
299 * reordering accesses in a way that violates the semantics of acquire
300 * and release.
301 */
302
303#if defined(_KERNEL)
304#if defined(SMP)
305#define	__storeload_barrier()	__mbk()
306#else /* _KERNEL && UP */
307#define	__storeload_barrier()	__compiler_membar()
308#endif /* SMP */
309#else /* !_KERNEL */
310#define	__storeload_barrier()	__mbu()
311#endif /* _KERNEL*/
312
313#define	ATOMIC_LOAD(TYPE)					\
314static __inline u_##TYPE					\
315atomic_load_acq_##TYPE(volatile u_##TYPE *p)			\
316{								\
317	u_##TYPE res;						\
318								\
319	res = *p;						\
320	__compiler_membar();					\
321	return (res);						\
322}								\
323struct __hack
324
325#define	ATOMIC_STORE(TYPE)					\
326static __inline void						\
327atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)	\
328{								\
329								\
330	__compiler_membar();					\
331	*p = v;							\
332}								\
333struct __hack
334
335static __inline void
336atomic_thread_fence_acq(void)
337{
338
339	__compiler_membar();
340}
341
342static __inline void
343atomic_thread_fence_rel(void)
344{
345
346	__compiler_membar();
347}
348
349static __inline void
350atomic_thread_fence_acq_rel(void)
351{
352
353	__compiler_membar();
354}
355
356static __inline void
357atomic_thread_fence_seq_cst(void)
358{
359
360	__storeload_barrier();
361}
362
363#ifdef _KERNEL
364
365#ifdef WANT_FUNCTIONS
366int		atomic_cmpset_64_i386(volatile uint64_t *, uint64_t, uint64_t);
367int		atomic_cmpset_64_i586(volatile uint64_t *, uint64_t, uint64_t);
368int		atomic_fcmpset_64_i386(volatile uint64_t *, uint64_t *, uint64_t);
369int		atomic_fcmpset_64_i586(volatile uint64_t *, uint64_t *, uint64_t);
370uint64_t	atomic_load_acq_64_i386(volatile uint64_t *);
371uint64_t	atomic_load_acq_64_i586(volatile uint64_t *);
372void		atomic_store_rel_64_i386(volatile uint64_t *, uint64_t);
373void		atomic_store_rel_64_i586(volatile uint64_t *, uint64_t);
374uint64_t	atomic_swap_64_i386(volatile uint64_t *, uint64_t);
375uint64_t	atomic_swap_64_i586(volatile uint64_t *, uint64_t);
376#endif
377
378/* I486 does not support SMP or CMPXCHG8B. */
379static __inline int
380atomic_cmpset_64_i386(volatile uint64_t *dst, uint64_t expect, uint64_t src)
381{
382	volatile uint32_t *p;
383	u_char res;
384
385	p = (volatile uint32_t *)dst;
386	__asm __volatile(
387	"	pushfl ;		"
388	"	cli ;			"
389	"	xorl	%1,%%eax ;	"
390	"	xorl	%2,%%edx ;	"
391	"	orl	%%edx,%%eax ;	"
392	"	jne	1f ;		"
393	"	movl	%4,%1 ;		"
394	"	movl	%5,%2 ;		"
395	"1:				"
396	"	sete	%3 ;		"
397	"	popfl"
398	: "+A" (expect),		/* 0 */
399	  "+m" (*p),			/* 1 */
400	  "+m" (*(p + 1)),		/* 2 */
401	  "=q" (res)			/* 3 */
402	: "r" ((uint32_t)src),		/* 4 */
403	  "r" ((uint32_t)(src >> 32))	/* 5 */
404	: "memory", "cc");
405	return (res);
406}
407
408static __inline int
409atomic_fcmpset_64_i386(volatile uint64_t *dst, uint64_t *expect, uint64_t src)
410{
411
412	if (atomic_cmpset_64_i386(dst, *expect, src)) {
413		return (1);
414	} else {
415		*expect = *dst;
416		return (0);
417	}
418}
419
420static __inline uint64_t
421atomic_load_acq_64_i386(volatile uint64_t *p)
422{
423	volatile uint32_t *q;
424	uint64_t res;
425
426	q = (volatile uint32_t *)p;
427	__asm __volatile(
428	"	pushfl ;		"
429	"	cli ;			"
430	"	movl	%1,%%eax ;	"
431	"	movl	%2,%%edx ;	"
432	"	popfl"
433	: "=&A" (res)			/* 0 */
434	: "m" (*q),			/* 1 */
435	  "m" (*(q + 1))		/* 2 */
436	: "memory");
437	return (res);
438}
439
440static __inline void
441atomic_store_rel_64_i386(volatile uint64_t *p, uint64_t v)
442{
443	volatile uint32_t *q;
444
445	q = (volatile uint32_t *)p;
446	__asm __volatile(
447	"	pushfl ;		"
448	"	cli ;			"
449	"	movl	%%eax,%0 ;	"
450	"	movl	%%edx,%1 ;	"
451	"	popfl"
452	: "=m" (*q),			/* 0 */
453	  "=m" (*(q + 1))		/* 1 */
454	: "A" (v)			/* 2 */
455	: "memory");
456}
457
458static __inline uint64_t
459atomic_swap_64_i386(volatile uint64_t *p, uint64_t v)
460{
461	volatile uint32_t *q;
462	uint64_t res;
463
464	q = (volatile uint32_t *)p;
465	__asm __volatile(
466	"	pushfl ;		"
467	"	cli ;			"
468	"	movl	%1,%%eax ;	"
469	"	movl	%2,%%edx ;	"
470	"	movl	%4,%2 ;		"
471	"	movl	%3,%1 ;		"
472	"	popfl"
473	: "=&A" (res),			/* 0 */
474	  "+m" (*q),			/* 1 */
475	  "+m" (*(q + 1))		/* 2 */
476	: "r" ((uint32_t)v),		/* 3 */
477	  "r" ((uint32_t)(v >> 32)));	/* 4 */
478	return (res);
479}
480
481static __inline int
482atomic_cmpset_64_i586(volatile uint64_t *dst, uint64_t expect, uint64_t src)
483{
484	u_char res;
485
486	__asm __volatile(
487	"	" MPLOCKED "		"
488	"	cmpxchg8b %1 ;		"
489	"	sete	%0"
490	: "=q" (res),			/* 0 */
491	  "+m" (*dst),			/* 1 */
492	  "+A" (expect)			/* 2 */
493	: "b" ((uint32_t)src),		/* 3 */
494	  "c" ((uint32_t)(src >> 32))	/* 4 */
495	: "memory", "cc");
496	return (res);
497}
498
499static __inline int
500atomic_fcmpset_64_i586(volatile uint64_t *dst, uint64_t *expect, uint64_t src)
501{
502	u_char res;
503
504	__asm __volatile(
505	"	" MPLOCKED "		"
506	"	cmpxchg8b %1 ;		"
507	"	sete	%0"
508	: "=q" (res),			/* 0 */
509	  "+m" (*dst),			/* 1 */
510	  "+A" (*expect)		/* 2 */
511	: "b" ((uint32_t)src),		/* 3 */
512	  "c" ((uint32_t)(src >> 32))	/* 4 */
513	: "memory", "cc");
514	return (res);
515}
516
517static __inline uint64_t
518atomic_load_acq_64_i586(volatile uint64_t *p)
519{
520	uint64_t res;
521
522	__asm __volatile(
523	"	movl	%%ebx,%%eax ;	"
524	"	movl	%%ecx,%%edx ;	"
525	"	" MPLOCKED "		"
526	"	cmpxchg8b %1"
527	: "=&A" (res),			/* 0 */
528	  "+m" (*p)			/* 1 */
529	: : "memory", "cc");
530	return (res);
531}
532
533static __inline void
534atomic_store_rel_64_i586(volatile uint64_t *p, uint64_t v)
535{
536
537	__asm __volatile(
538	"	movl	%%eax,%%ebx ;	"
539	"	movl	%%edx,%%ecx ;	"
540	"1:				"
541	"	" MPLOCKED "		"
542	"	cmpxchg8b %0 ;		"
543	"	jne	1b"
544	: "+m" (*p),			/* 0 */
545	  "+A" (v)			/* 1 */
546	: : "ebx", "ecx", "memory", "cc");
547}
548
549static __inline uint64_t
550atomic_swap_64_i586(volatile uint64_t *p, uint64_t v)
551{
552
553	__asm __volatile(
554	"	movl	%%eax,%%ebx ;	"
555	"	movl	%%edx,%%ecx ;	"
556	"1:				"
557	"	" MPLOCKED "		"
558	"	cmpxchg8b %0 ;		"
559	"	jne	1b"
560	: "+m" (*p),			/* 0 */
561	  "+A" (v)			/* 1 */
562	: : "ebx", "ecx", "memory", "cc");
563	return (v);
564}
565
566static __inline int
567atomic_cmpset_64(volatile uint64_t *dst, uint64_t expect, uint64_t src)
568{
569
570	if ((cpu_feature & CPUID_CX8) == 0)
571		return (atomic_cmpset_64_i386(dst, expect, src));
572	else
573		return (atomic_cmpset_64_i586(dst, expect, src));
574}
575
576static __inline int
577atomic_fcmpset_64(volatile uint64_t *dst, uint64_t *expect, uint64_t src)
578{
579
580  	if ((cpu_feature & CPUID_CX8) == 0)
581		return (atomic_fcmpset_64_i386(dst, expect, src));
582	else
583		return (atomic_fcmpset_64_i586(dst, expect, src));
584}
585
586static __inline uint64_t
587atomic_load_acq_64(volatile uint64_t *p)
588{
589
590	if ((cpu_feature & CPUID_CX8) == 0)
591		return (atomic_load_acq_64_i386(p));
592	else
593		return (atomic_load_acq_64_i586(p));
594}
595
596static __inline void
597atomic_store_rel_64(volatile uint64_t *p, uint64_t v)
598{
599
600	if ((cpu_feature & CPUID_CX8) == 0)
601		atomic_store_rel_64_i386(p, v);
602	else
603		atomic_store_rel_64_i586(p, v);
604}
605
606static __inline uint64_t
607atomic_swap_64(volatile uint64_t *p, uint64_t v)
608{
609
610	if ((cpu_feature & CPUID_CX8) == 0)
611		return (atomic_swap_64_i386(p, v));
612	else
613		return (atomic_swap_64_i586(p, v));
614}
615
616static __inline uint64_t
617atomic_fetchadd_64(volatile uint64_t *p, uint64_t v)
618{
619
620	for (;;) {
621		uint64_t t = *p;
622		if (atomic_cmpset_64(p, t, t + v))
623			return (t);
624	}
625}
626
627static __inline void
628atomic_add_64(volatile uint64_t *p, uint64_t v)
629{
630	uint64_t t;
631
632	for (;;) {
633		t = *p;
634		if (atomic_cmpset_64(p, t, t + v))
635			break;
636	}
637}
638
639static __inline void
640atomic_subtract_64(volatile uint64_t *p, uint64_t v)
641{
642	uint64_t t;
643
644	for (;;) {
645		t = *p;
646		if (atomic_cmpset_64(p, t, t - v))
647			break;
648	}
649}
650
651#endif /* _KERNEL */
652
653#endif /* KLD_MODULE || !__GNUCLIKE_ASM */
654
655ATOMIC_ASM(set,	     char,  "orb %b1,%0",  "iq",  v);
656ATOMIC_ASM(clear,    char,  "andb %b1,%0", "iq", ~v);
657ATOMIC_ASM(add,	     char,  "addb %b1,%0", "iq",  v);
658ATOMIC_ASM(subtract, char,  "subb %b1,%0", "iq",  v);
659
660ATOMIC_ASM(set,	     short, "orw %w1,%0",  "ir",  v);
661ATOMIC_ASM(clear,    short, "andw %w1,%0", "ir", ~v);
662ATOMIC_ASM(add,	     short, "addw %w1,%0", "ir",  v);
663ATOMIC_ASM(subtract, short, "subw %w1,%0", "ir",  v);
664
665ATOMIC_ASM(set,	     int,   "orl %1,%0",   "ir",  v);
666ATOMIC_ASM(clear,    int,   "andl %1,%0",  "ir", ~v);
667ATOMIC_ASM(add,	     int,   "addl %1,%0",  "ir",  v);
668ATOMIC_ASM(subtract, int,   "subl %1,%0",  "ir",  v);
669
670ATOMIC_ASM(set,	     long,  "orl %1,%0",   "ir",  v);
671ATOMIC_ASM(clear,    long,  "andl %1,%0",  "ir", ~v);
672ATOMIC_ASM(add,	     long,  "addl %1,%0",  "ir",  v);
673ATOMIC_ASM(subtract, long,  "subl %1,%0",  "ir",  v);
674
675#define	ATOMIC_LOADSTORE(TYPE)				\
676	ATOMIC_LOAD(TYPE);				\
677	ATOMIC_STORE(TYPE)
678
679ATOMIC_LOADSTORE(char);
680ATOMIC_LOADSTORE(short);
681ATOMIC_LOADSTORE(int);
682ATOMIC_LOADSTORE(long);
683
684#undef ATOMIC_ASM
685#undef ATOMIC_LOAD
686#undef ATOMIC_STORE
687#undef ATOMIC_LOADSTORE
688
689#ifndef WANT_FUNCTIONS
690
691static __inline int
692atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src)
693{
694
695	return (atomic_cmpset_int((volatile u_int *)dst, (u_int)expect,
696	    (u_int)src));
697}
698
699static __inline int
700atomic_fcmpset_long(volatile u_long *dst, u_long *expect, u_long src)
701{
702
703	return (atomic_fcmpset_int((volatile u_int *)dst, (u_int *)expect,
704	    (u_int)src));
705}
706
707static __inline u_long
708atomic_fetchadd_long(volatile u_long *p, u_long v)
709{
710
711	return (atomic_fetchadd_int((volatile u_int *)p, (u_int)v));
712}
713
714static __inline int
715atomic_testandset_long(volatile u_long *p, u_int v)
716{
717
718	return (atomic_testandset_int((volatile u_int *)p, v));
719}
720
721static __inline int
722atomic_testandclear_long(volatile u_long *p, u_int v)
723{
724
725	return (atomic_testandclear_int((volatile u_int *)p, v));
726}
727
728/* Read the current value and store a new value in the destination. */
729#ifdef __GNUCLIKE_ASM
730
731static __inline u_int
732atomic_swap_int(volatile u_int *p, u_int v)
733{
734
735	__asm __volatile(
736	"	xchgl	%1,%0 ;		"
737	"# atomic_swap_int"
738	: "+r" (v),			/* 0 */
739	  "+m" (*p));			/* 1 */
740	return (v);
741}
742
743static __inline u_long
744atomic_swap_long(volatile u_long *p, u_long v)
745{
746
747	return (atomic_swap_int((volatile u_int *)p, (u_int)v));
748}
749
750#else /* !__GNUCLIKE_ASM */
751
752u_int	atomic_swap_int(volatile u_int *p, u_int v);
753u_long	atomic_swap_long(volatile u_long *p, u_long v);
754
755#endif /* __GNUCLIKE_ASM */
756
757#define	atomic_set_acq_char		atomic_set_barr_char
758#define	atomic_set_rel_char		atomic_set_barr_char
759#define	atomic_clear_acq_char		atomic_clear_barr_char
760#define	atomic_clear_rel_char		atomic_clear_barr_char
761#define	atomic_add_acq_char		atomic_add_barr_char
762#define	atomic_add_rel_char		atomic_add_barr_char
763#define	atomic_subtract_acq_char	atomic_subtract_barr_char
764#define	atomic_subtract_rel_char	atomic_subtract_barr_char
765#define	atomic_cmpset_acq_char		atomic_cmpset_char
766#define	atomic_cmpset_rel_char		atomic_cmpset_char
767#define	atomic_fcmpset_acq_char		atomic_fcmpset_char
768#define	atomic_fcmpset_rel_char		atomic_fcmpset_char
769
770#define	atomic_set_acq_short		atomic_set_barr_short
771#define	atomic_set_rel_short		atomic_set_barr_short
772#define	atomic_clear_acq_short		atomic_clear_barr_short
773#define	atomic_clear_rel_short		atomic_clear_barr_short
774#define	atomic_add_acq_short		atomic_add_barr_short
775#define	atomic_add_rel_short		atomic_add_barr_short
776#define	atomic_subtract_acq_short	atomic_subtract_barr_short
777#define	atomic_subtract_rel_short	atomic_subtract_barr_short
778#define	atomic_cmpset_acq_short		atomic_cmpset_short
779#define	atomic_cmpset_rel_short		atomic_cmpset_short
780#define	atomic_fcmpset_acq_short	atomic_fcmpset_short
781#define	atomic_fcmpset_rel_short	atomic_fcmpset_short
782
783#define	atomic_set_acq_int		atomic_set_barr_int
784#define	atomic_set_rel_int		atomic_set_barr_int
785#define	atomic_clear_acq_int		atomic_clear_barr_int
786#define	atomic_clear_rel_int		atomic_clear_barr_int
787#define	atomic_add_acq_int		atomic_add_barr_int
788#define	atomic_add_rel_int		atomic_add_barr_int
789#define	atomic_subtract_acq_int		atomic_subtract_barr_int
790#define	atomic_subtract_rel_int		atomic_subtract_barr_int
791#define	atomic_cmpset_acq_int		atomic_cmpset_int
792#define	atomic_cmpset_rel_int		atomic_cmpset_int
793#define	atomic_fcmpset_acq_int		atomic_fcmpset_int
794#define	atomic_fcmpset_rel_int		atomic_fcmpset_int
795
796#define	atomic_set_acq_long		atomic_set_barr_long
797#define	atomic_set_rel_long		atomic_set_barr_long
798#define	atomic_clear_acq_long		atomic_clear_barr_long
799#define	atomic_clear_rel_long		atomic_clear_barr_long
800#define	atomic_add_acq_long		atomic_add_barr_long
801#define	atomic_add_rel_long		atomic_add_barr_long
802#define	atomic_subtract_acq_long	atomic_subtract_barr_long
803#define	atomic_subtract_rel_long	atomic_subtract_barr_long
804#define	atomic_cmpset_acq_long		atomic_cmpset_long
805#define	atomic_cmpset_rel_long		atomic_cmpset_long
806#define	atomic_fcmpset_acq_long		atomic_fcmpset_long
807#define	atomic_fcmpset_rel_long		atomic_fcmpset_long
808
809#define	atomic_readandclear_int(p)	atomic_swap_int(p, 0)
810#define	atomic_readandclear_long(p)	atomic_swap_long(p, 0)
811
812/* Operations on 8-bit bytes. */
813#define	atomic_set_8		atomic_set_char
814#define	atomic_set_acq_8	atomic_set_acq_char
815#define	atomic_set_rel_8	atomic_set_rel_char
816#define	atomic_clear_8		atomic_clear_char
817#define	atomic_clear_acq_8	atomic_clear_acq_char
818#define	atomic_clear_rel_8	atomic_clear_rel_char
819#define	atomic_add_8		atomic_add_char
820#define	atomic_add_acq_8	atomic_add_acq_char
821#define	atomic_add_rel_8	atomic_add_rel_char
822#define	atomic_subtract_8	atomic_subtract_char
823#define	atomic_subtract_acq_8	atomic_subtract_acq_char
824#define	atomic_subtract_rel_8	atomic_subtract_rel_char
825#define	atomic_load_acq_8	atomic_load_acq_char
826#define	atomic_store_rel_8	atomic_store_rel_char
827#define	atomic_cmpset_8		atomic_cmpset_char
828#define	atomic_cmpset_acq_8	atomic_cmpset_acq_char
829#define	atomic_cmpset_rel_8	atomic_cmpset_rel_char
830#define	atomic_fcmpset_8	atomic_fcmpset_char
831#define	atomic_fcmpset_acq_8	atomic_fcmpset_acq_char
832#define	atomic_fcmpset_rel_8	atomic_fcmpset_rel_char
833
834/* Operations on 16-bit words. */
835#define	atomic_set_16		atomic_set_short
836#define	atomic_set_acq_16	atomic_set_acq_short
837#define	atomic_set_rel_16	atomic_set_rel_short
838#define	atomic_clear_16		atomic_clear_short
839#define	atomic_clear_acq_16	atomic_clear_acq_short
840#define	atomic_clear_rel_16	atomic_clear_rel_short
841#define	atomic_add_16		atomic_add_short
842#define	atomic_add_acq_16	atomic_add_acq_short
843#define	atomic_add_rel_16	atomic_add_rel_short
844#define	atomic_subtract_16	atomic_subtract_short
845#define	atomic_subtract_acq_16	atomic_subtract_acq_short
846#define	atomic_subtract_rel_16	atomic_subtract_rel_short
847#define	atomic_load_acq_16	atomic_load_acq_short
848#define	atomic_store_rel_16	atomic_store_rel_short
849#define	atomic_cmpset_16	atomic_cmpset_short
850#define	atomic_cmpset_acq_16	atomic_cmpset_acq_short
851#define	atomic_cmpset_rel_16	atomic_cmpset_rel_short
852#define	atomic_fcmpset_16	atomic_fcmpset_short
853#define	atomic_fcmpset_acq_16	atomic_fcmpset_acq_short
854#define	atomic_fcmpset_rel_16	atomic_fcmpset_rel_short
855
856/* Operations on 32-bit double words. */
857#define	atomic_set_32		atomic_set_int
858#define	atomic_set_acq_32	atomic_set_acq_int
859#define	atomic_set_rel_32	atomic_set_rel_int
860#define	atomic_clear_32		atomic_clear_int
861#define	atomic_clear_acq_32	atomic_clear_acq_int
862#define	atomic_clear_rel_32	atomic_clear_rel_int
863#define	atomic_add_32		atomic_add_int
864#define	atomic_add_acq_32	atomic_add_acq_int
865#define	atomic_add_rel_32	atomic_add_rel_int
866#define	atomic_subtract_32	atomic_subtract_int
867#define	atomic_subtract_acq_32	atomic_subtract_acq_int
868#define	atomic_subtract_rel_32	atomic_subtract_rel_int
869#define	atomic_load_acq_32	atomic_load_acq_int
870#define	atomic_store_rel_32	atomic_store_rel_int
871#define	atomic_cmpset_32	atomic_cmpset_int
872#define	atomic_cmpset_acq_32	atomic_cmpset_acq_int
873#define	atomic_cmpset_rel_32	atomic_cmpset_rel_int
874#define	atomic_fcmpset_32	atomic_fcmpset_int
875#define	atomic_fcmpset_acq_32	atomic_fcmpset_acq_int
876#define	atomic_fcmpset_rel_32	atomic_fcmpset_rel_int
877#define	atomic_swap_32		atomic_swap_int
878#define	atomic_readandclear_32	atomic_readandclear_int
879#define	atomic_fetchadd_32	atomic_fetchadd_int
880#define	atomic_testandset_32	atomic_testandset_int
881#define	atomic_testandclear_32	atomic_testandclear_int
882
883/* Operations on 64-bit quad words. */
884#define	atomic_cmpset_acq_64 atomic_cmpset_64
885#define	atomic_cmpset_rel_64 atomic_cmpset_64
886#define	atomic_fcmpset_acq_64 atomic_fcmpset_64
887#define	atomic_fcmpset_rel_64 atomic_fcmpset_64
888#define	atomic_fetchadd_acq_64	atomic_fetchadd_64
889#define	atomic_fetchadd_rel_64	atomic_fetchadd_64
890#define	atomic_add_acq_64 atomic_add_64
891#define	atomic_add_rel_64 atomic_add_64
892#define	atomic_subtract_acq_64 atomic_subtract_64
893#define	atomic_subtract_rel_64 atomic_subtract_64
894
895/* Operations on pointers. */
896#define	atomic_set_ptr(p, v) \
897	atomic_set_int((volatile u_int *)(p), (u_int)(v))
898#define	atomic_set_acq_ptr(p, v) \
899	atomic_set_acq_int((volatile u_int *)(p), (u_int)(v))
900#define	atomic_set_rel_ptr(p, v) \
901	atomic_set_rel_int((volatile u_int *)(p), (u_int)(v))
902#define	atomic_clear_ptr(p, v) \
903	atomic_clear_int((volatile u_int *)(p), (u_int)(v))
904#define	atomic_clear_acq_ptr(p, v) \
905	atomic_clear_acq_int((volatile u_int *)(p), (u_int)(v))
906#define	atomic_clear_rel_ptr(p, v) \
907	atomic_clear_rel_int((volatile u_int *)(p), (u_int)(v))
908#define	atomic_add_ptr(p, v) \
909	atomic_add_int((volatile u_int *)(p), (u_int)(v))
910#define	atomic_add_acq_ptr(p, v) \
911	atomic_add_acq_int((volatile u_int *)(p), (u_int)(v))
912#define	atomic_add_rel_ptr(p, v) \
913	atomic_add_rel_int((volatile u_int *)(p), (u_int)(v))
914#define	atomic_subtract_ptr(p, v) \
915	atomic_subtract_int((volatile u_int *)(p), (u_int)(v))
916#define	atomic_subtract_acq_ptr(p, v) \
917	atomic_subtract_acq_int((volatile u_int *)(p), (u_int)(v))
918#define	atomic_subtract_rel_ptr(p, v) \
919	atomic_subtract_rel_int((volatile u_int *)(p), (u_int)(v))
920#define	atomic_load_acq_ptr(p) \
921	atomic_load_acq_int((volatile u_int *)(p))
922#define	atomic_store_rel_ptr(p, v) \
923	atomic_store_rel_int((volatile u_int *)(p), (v))
924#define	atomic_cmpset_ptr(dst, old, new) \
925	atomic_cmpset_int((volatile u_int *)(dst), (u_int)(old), (u_int)(new))
926#define	atomic_cmpset_acq_ptr(dst, old, new) \
927	atomic_cmpset_acq_int((volatile u_int *)(dst), (u_int)(old), \
928	    (u_int)(new))
929#define	atomic_cmpset_rel_ptr(dst, old, new) \
930	atomic_cmpset_rel_int((volatile u_int *)(dst), (u_int)(old), \
931	    (u_int)(new))
932#define	atomic_fcmpset_ptr(dst, old, new) \
933	atomic_fcmpset_int((volatile u_int *)(dst), (u_int *)(old), (u_int)(new))
934#define	atomic_fcmpset_acq_ptr(dst, old, new) \
935	atomic_fcmpset_acq_int((volatile u_int *)(dst), (u_int *)(old), \
936	    (u_int)(new))
937#define	atomic_fcmpset_rel_ptr(dst, old, new) \
938	atomic_fcmpset_rel_int((volatile u_int *)(dst), (u_int *)(old), \
939	    (u_int)(new))
940#define	atomic_swap_ptr(p, v) \
941	atomic_swap_int((volatile u_int *)(p), (u_int)(v))
942#define	atomic_readandclear_ptr(p) \
943	atomic_readandclear_int((volatile u_int *)(p))
944
945#endif /* !WANT_FUNCTIONS */
946
947#if defined(_KERNEL)
948#define	mb()	__mbk()
949#define	wmb()	__mbk()
950#define	rmb()	__mbk()
951#else
952#define	mb()	__mbu()
953#define	wmb()	__mbu()
954#define	rmb()	__mbu()
955#endif
956
957#endif /* !_MACHINE_ATOMIC_H_ */
958