atomic.h revision 337893
1/*-
2 * Copyright (c) 1998 Doug Rabson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/11/sys/i386/include/atomic.h 337893 2018-08-16 08:05:04Z hselasky $
27 */
28#ifndef _MACHINE_ATOMIC_H_
29#define	_MACHINE_ATOMIC_H_
30
31#ifndef _SYS_CDEFS_H_
32#error this file needs sys/cdefs.h as a prerequisite
33#endif
34
35#include <sys/atomic_common.h>
36
37#ifdef _KERNEL
38#include <machine/md_var.h>
39#include <machine/specialreg.h>
40#endif
41
42#ifndef __OFFSETOF_MONITORBUF
43/*
44 * __OFFSETOF_MONITORBUF == __pcpu_offset(pc_monitorbuf).
45 *
46 * The open-coded number is used instead of the symbolic expression to
47 * avoid a dependency on sys/pcpu.h in machine/atomic.h consumers.
48 * An assertion in i386/vm_machdep.c ensures that the value is correct.
49 */
50#define	__OFFSETOF_MONITORBUF	0x180
51
52static __inline void
53__mbk(void)
54{
55
56	__asm __volatile("lock; addl $0,%%fs:%0"
57	    : "+m" (*(u_int *)__OFFSETOF_MONITORBUF) : : "memory", "cc");
58}
59
60static __inline void
61__mbu(void)
62{
63
64	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc");
65}
66#endif
67
68/*
69 * Various simple operations on memory, each of which is atomic in the
70 * presence of interrupts and multiple processors.
71 *
72 * atomic_set_char(P, V)	(*(u_char *)(P) |= (V))
73 * atomic_clear_char(P, V)	(*(u_char *)(P) &= ~(V))
74 * atomic_add_char(P, V)	(*(u_char *)(P) += (V))
75 * atomic_subtract_char(P, V)	(*(u_char *)(P) -= (V))
76 *
77 * atomic_set_short(P, V)	(*(u_short *)(P) |= (V))
78 * atomic_clear_short(P, V)	(*(u_short *)(P) &= ~(V))
79 * atomic_add_short(P, V)	(*(u_short *)(P) += (V))
80 * atomic_subtract_short(P, V)	(*(u_short *)(P) -= (V))
81 *
82 * atomic_set_int(P, V)		(*(u_int *)(P) |= (V))
83 * atomic_clear_int(P, V)	(*(u_int *)(P) &= ~(V))
84 * atomic_add_int(P, V)		(*(u_int *)(P) += (V))
85 * atomic_subtract_int(P, V)	(*(u_int *)(P) -= (V))
86 * atomic_swap_int(P, V)	(return (*(u_int *)(P)); *(u_int *)(P) = (V);)
87 * atomic_readandclear_int(P)	(return (*(u_int *)(P)); *(u_int *)(P) = 0;)
88 *
89 * atomic_set_long(P, V)	(*(u_long *)(P) |= (V))
90 * atomic_clear_long(P, V)	(*(u_long *)(P) &= ~(V))
91 * atomic_add_long(P, V)	(*(u_long *)(P) += (V))
92 * atomic_subtract_long(P, V)	(*(u_long *)(P) -= (V))
93 * atomic_swap_long(P, V)	(return (*(u_long *)(P)); *(u_long *)(P) = (V);)
94 * atomic_readandclear_long(P)	(return (*(u_long *)(P)); *(u_long *)(P) = 0;)
95 */
96
97/*
98 * The above functions are expanded inline in the statically-linked
99 * kernel.  Lock prefixes are generated if an SMP kernel is being
100 * built.
101 *
102 * Kernel modules call real functions which are built into the kernel.
103 * This allows kernel modules to be portable between UP and SMP systems.
104 */
105#if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM)
106#define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)			\
107void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v);	\
108void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
109
110int	atomic_cmpset_char(volatile u_char *dst, u_char expect, u_char src);
111int	atomic_cmpset_short(volatile u_short *dst, u_short expect, u_short src);
112int	atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src);
113int	atomic_fcmpset_char(volatile u_char *dst, u_char *expect, u_char src);
114int	atomic_fcmpset_short(volatile u_short *dst, u_short *expect,
115	    u_short src);
116int	atomic_fcmpset_int(volatile u_int *dst, u_int *expect, u_int src);
117u_int	atomic_fetchadd_int(volatile u_int *p, u_int v);
118int	atomic_testandset_int(volatile u_int *p, u_int v);
119int	atomic_testandclear_int(volatile u_int *p, u_int v);
120void	atomic_thread_fence_acq(void);
121void	atomic_thread_fence_acq_rel(void);
122void	atomic_thread_fence_rel(void);
123void	atomic_thread_fence_seq_cst(void);
124
125#define	ATOMIC_LOAD(TYPE)					\
126u_##TYPE	atomic_load_acq_##TYPE(volatile u_##TYPE *p)
127#define	ATOMIC_STORE(TYPE)					\
128void		atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
129
130int		atomic_cmpset_64(volatile uint64_t *, uint64_t, uint64_t);
131int		atomic_fcmpset_64(volatile uint64_t *, uint64_t *, uint64_t);
132uint64_t	atomic_load_acq_64(volatile uint64_t *);
133void		atomic_store_rel_64(volatile uint64_t *, uint64_t);
134uint64_t	atomic_swap_64(volatile uint64_t *, uint64_t);
135uint64_t	atomic_fetchadd_64(volatile uint64_t *, uint64_t);
136void		atomic_add_64(volatile uint64_t *, uint64_t);
137void		atomic_subtract_64(volatile uint64_t *, uint64_t);
138
139#else /* !KLD_MODULE && __GNUCLIKE_ASM */
140
141/*
142 * For userland, always use lock prefixes so that the binaries will run
143 * on both SMP and !SMP systems.
144 */
145#if defined(SMP) || !defined(_KERNEL)
146#define	MPLOCKED	"lock ; "
147#else
148#define	MPLOCKED
149#endif
150
151/*
152 * The assembly is volatilized to avoid code chunk removal by the compiler.
153 * GCC aggressively reorders operations and memory clobbering is necessary
154 * in order to avoid that for memory barriers.
155 */
156#define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)		\
157static __inline void					\
158atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
159{							\
160	__asm __volatile(MPLOCKED OP			\
161	: "+m" (*p)					\
162	: CONS (V)					\
163	: "cc");					\
164}							\
165							\
166static __inline void					\
167atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
168{							\
169	__asm __volatile(MPLOCKED OP			\
170	: "+m" (*p)					\
171	: CONS (V)					\
172	: "memory", "cc");				\
173}							\
174struct __hack
175
176/*
177 * Atomic compare and set, used by the mutex functions.
178 *
179 * cmpset:
180 *	if (*dst == expect)
181 *		*dst = src
182 *
183 * fcmpset:
184 *	if (*dst == *expect)
185 *		*dst = src
186 *	else
187 *		*expect = *dst
188 *
189 * Returns 0 on failure, non-zero on success.
190 */
191#define	ATOMIC_CMPSET(TYPE, CONS)			\
192static __inline int					\
193atomic_cmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE expect, u_##TYPE src) \
194{							\
195	u_char res;					\
196							\
197	__asm __volatile(				\
198	"	" MPLOCKED "		"		\
199	"	cmpxchg	%3,%1 ;		"		\
200	"	sete	%0 ;		"		\
201	"# atomic_cmpset_" #TYPE "	"		\
202	: "=q" (res),			/* 0 */		\
203	  "+m" (*dst),			/* 1 */		\
204	  "+a" (expect)			/* 2 */		\
205	: CONS (src)			/* 3 */		\
206	: "memory", "cc");				\
207	return (res);					\
208}							\
209							\
210static __inline int					\
211atomic_fcmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE *expect, u_##TYPE src) \
212{							\
213	u_char res;					\
214							\
215	__asm __volatile(				\
216	"	" MPLOCKED "		"		\
217	"	cmpxchg	%3,%1 ;		"		\
218	"	sete	%0 ;		"		\
219	"# atomic_fcmpset_" #TYPE "	"		\
220	: "=q" (res),			/* 0 */		\
221	  "+m" (*dst),			/* 1 */		\
222	  "+a" (*expect)		/* 2 */		\
223	: CONS (src)			/* 3 */		\
224	: "memory", "cc");				\
225	return (res);					\
226}
227
228ATOMIC_CMPSET(char, "q");
229ATOMIC_CMPSET(short, "r");
230ATOMIC_CMPSET(int, "r");
231
232/*
233 * Atomically add the value of v to the integer pointed to by p and return
234 * the previous value of *p.
235 */
236static __inline u_int
237atomic_fetchadd_int(volatile u_int *p, u_int v)
238{
239
240	__asm __volatile(
241	"	" MPLOCKED "		"
242	"	xaddl	%0,%1 ;		"
243	"# atomic_fetchadd_int"
244	: "+r" (v),			/* 0 */
245	  "+m" (*p)			/* 1 */
246	: : "cc");
247	return (v);
248}
249
250static __inline int
251atomic_testandset_int(volatile u_int *p, u_int v)
252{
253	u_char res;
254
255	__asm __volatile(
256	"	" MPLOCKED "		"
257	"	btsl	%2,%1 ;		"
258	"	setc	%0 ;		"
259	"# atomic_testandset_int"
260	: "=q" (res),			/* 0 */
261	  "+m" (*p)			/* 1 */
262	: "Ir" (v & 0x1f)		/* 2 */
263	: "cc");
264	return (res);
265}
266
267static __inline int
268atomic_testandclear_int(volatile u_int *p, u_int v)
269{
270	u_char res;
271
272	__asm __volatile(
273	"	" MPLOCKED "		"
274	"	btrl	%2,%1 ;		"
275	"	setc	%0 ;		"
276	"# atomic_testandclear_int"
277	: "=q" (res),			/* 0 */
278	  "+m" (*p)			/* 1 */
279	: "Ir" (v & 0x1f)		/* 2 */
280	: "cc");
281	return (res);
282}
283
284/*
285 * We assume that a = b will do atomic loads and stores.  Due to the
286 * IA32 memory model, a simple store guarantees release semantics.
287 *
288 * However, a load may pass a store if they are performed on distinct
289 * addresses, so we need Store/Load barrier for sequentially
290 * consistent fences in SMP kernels.  We use "lock addl $0,mem" for a
291 * Store/Load barrier, as recommended by the AMD Software Optimization
292 * Guide, and not mfence.  In the kernel, we use a private per-cpu
293 * cache line for "mem", to avoid introducing false data
294 * dependencies.  In user space, we use the word at the top of the
295 * stack.
296 *
297 * For UP kernels, however, the memory of the single processor is
298 * always consistent, so we only need to stop the compiler from
299 * reordering accesses in a way that violates the semantics of acquire
300 * and release.
301 */
302
303#if defined(_KERNEL)
304#if defined(SMP)
305#define	__storeload_barrier()	__mbk()
306#else /* _KERNEL && UP */
307#define	__storeload_barrier()	__compiler_membar()
308#endif /* SMP */
309#else /* !_KERNEL */
310#define	__storeload_barrier()	__mbu()
311#endif /* _KERNEL*/
312
313#define	ATOMIC_LOAD(TYPE)					\
314static __inline u_##TYPE					\
315atomic_load_acq_##TYPE(volatile u_##TYPE *p)			\
316{								\
317	u_##TYPE res;						\
318								\
319	res = *p;						\
320	__compiler_membar();					\
321	return (res);						\
322}								\
323struct __hack
324
325#define	ATOMIC_STORE(TYPE)					\
326static __inline void						\
327atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)	\
328{								\
329								\
330	__compiler_membar();					\
331	*p = v;							\
332}								\
333struct __hack
334
335static __inline void
336atomic_thread_fence_acq(void)
337{
338
339	__compiler_membar();
340}
341
342static __inline void
343atomic_thread_fence_rel(void)
344{
345
346	__compiler_membar();
347}
348
349static __inline void
350atomic_thread_fence_acq_rel(void)
351{
352
353	__compiler_membar();
354}
355
356static __inline void
357atomic_thread_fence_seq_cst(void)
358{
359
360	__storeload_barrier();
361}
362
363#ifdef _KERNEL
364
365#ifdef WANT_FUNCTIONS
366int		atomic_cmpset_64_i386(volatile uint64_t *, uint64_t, uint64_t);
367int		atomic_cmpset_64_i586(volatile uint64_t *, uint64_t, uint64_t);
368uint64_t	atomic_load_acq_64_i386(volatile uint64_t *);
369uint64_t	atomic_load_acq_64_i586(volatile uint64_t *);
370void		atomic_store_rel_64_i386(volatile uint64_t *, uint64_t);
371void		atomic_store_rel_64_i586(volatile uint64_t *, uint64_t);
372uint64_t	atomic_swap_64_i386(volatile uint64_t *, uint64_t);
373uint64_t	atomic_swap_64_i586(volatile uint64_t *, uint64_t);
374#endif
375
376/* I486 does not support SMP or CMPXCHG8B. */
377static __inline int
378atomic_cmpset_64_i386(volatile uint64_t *dst, uint64_t expect, uint64_t src)
379{
380	volatile uint32_t *p;
381	u_char res;
382
383	p = (volatile uint32_t *)dst;
384	__asm __volatile(
385	"	pushfl ;		"
386	"	cli ;			"
387	"	xorl	%1,%%eax ;	"
388	"	xorl	%2,%%edx ;	"
389	"	orl	%%edx,%%eax ;	"
390	"	jne	1f ;		"
391	"	movl	%4,%1 ;		"
392	"	movl	%5,%2 ;		"
393	"1:				"
394	"	sete	%3 ;		"
395	"	popfl"
396	: "+A" (expect),		/* 0 */
397	  "+m" (*p),			/* 1 */
398	  "+m" (*(p + 1)),		/* 2 */
399	  "=q" (res)			/* 3 */
400	: "r" ((uint32_t)src),		/* 4 */
401	  "r" ((uint32_t)(src >> 32))	/* 5 */
402	: "memory", "cc");
403	return (res);
404}
405
406static __inline int
407atomic_fcmpset_64_i386(volatile uint64_t *dst, uint64_t *expect, uint64_t src)
408{
409
410	if (atomic_cmpset_64_i386(dst, *expect, src)) {
411		return (1);
412	} else {
413		*expect = *dst;
414		return (0);
415	}
416}
417
418static __inline uint64_t
419atomic_load_acq_64_i386(volatile uint64_t *p)
420{
421	volatile uint32_t *q;
422	uint64_t res;
423
424	q = (volatile uint32_t *)p;
425	__asm __volatile(
426	"	pushfl ;		"
427	"	cli ;			"
428	"	movl	%1,%%eax ;	"
429	"	movl	%2,%%edx ;	"
430	"	popfl"
431	: "=&A" (res)			/* 0 */
432	: "m" (*q),			/* 1 */
433	  "m" (*(q + 1))		/* 2 */
434	: "memory");
435	return (res);
436}
437
438static __inline void
439atomic_store_rel_64_i386(volatile uint64_t *p, uint64_t v)
440{
441	volatile uint32_t *q;
442
443	q = (volatile uint32_t *)p;
444	__asm __volatile(
445	"	pushfl ;		"
446	"	cli ;			"
447	"	movl	%%eax,%0 ;	"
448	"	movl	%%edx,%1 ;	"
449	"	popfl"
450	: "=m" (*q),			/* 0 */
451	  "=m" (*(q + 1))		/* 1 */
452	: "A" (v)			/* 2 */
453	: "memory");
454}
455
456static __inline uint64_t
457atomic_swap_64_i386(volatile uint64_t *p, uint64_t v)
458{
459	volatile uint32_t *q;
460	uint64_t res;
461
462	q = (volatile uint32_t *)p;
463	__asm __volatile(
464	"	pushfl ;		"
465	"	cli ;			"
466	"	movl	%1,%%eax ;	"
467	"	movl	%2,%%edx ;	"
468	"	movl	%4,%2 ;		"
469	"	movl	%3,%1 ;		"
470	"	popfl"
471	: "=&A" (res),			/* 0 */
472	  "+m" (*q),			/* 1 */
473	  "+m" (*(q + 1))		/* 2 */
474	: "r" ((uint32_t)v),		/* 3 */
475	  "r" ((uint32_t)(v >> 32)));	/* 4 */
476	return (res);
477}
478
479static __inline int
480atomic_cmpset_64_i586(volatile uint64_t *dst, uint64_t expect, uint64_t src)
481{
482	u_char res;
483
484	__asm __volatile(
485	"	" MPLOCKED "		"
486	"	cmpxchg8b %1 ;		"
487	"	sete	%0"
488	: "=q" (res),			/* 0 */
489	  "+m" (*dst),			/* 1 */
490	  "+A" (expect)			/* 2 */
491	: "b" ((uint32_t)src),		/* 3 */
492	  "c" ((uint32_t)(src >> 32))	/* 4 */
493	: "memory", "cc");
494	return (res);
495}
496
497static __inline int
498atomic_fcmpset_64_i586(volatile uint64_t *dst, uint64_t *expect, uint64_t src)
499{
500	u_char res;
501
502	__asm __volatile(
503	"	" MPLOCKED "		"
504	"	cmpxchg8b %1 ;		"
505	"	sete	%0"
506	: "=q" (res),			/* 0 */
507	  "+m" (*dst),			/* 1 */
508	  "+A" (*expect)		/* 2 */
509	: "b" ((uint32_t)src),		/* 3 */
510	  "c" ((uint32_t)(src >> 32))	/* 4 */
511	: "memory", "cc");
512	return (res);
513}
514
515static __inline uint64_t
516atomic_load_acq_64_i586(volatile uint64_t *p)
517{
518	uint64_t res;
519
520	__asm __volatile(
521	"	movl	%%ebx,%%eax ;	"
522	"	movl	%%ecx,%%edx ;	"
523	"	" MPLOCKED "		"
524	"	cmpxchg8b %1"
525	: "=&A" (res),			/* 0 */
526	  "+m" (*p)			/* 1 */
527	: : "memory", "cc");
528	return (res);
529}
530
531static __inline void
532atomic_store_rel_64_i586(volatile uint64_t *p, uint64_t v)
533{
534
535	__asm __volatile(
536	"	movl	%%eax,%%ebx ;	"
537	"	movl	%%edx,%%ecx ;	"
538	"1:				"
539	"	" MPLOCKED "		"
540	"	cmpxchg8b %0 ;		"
541	"	jne	1b"
542	: "+m" (*p),			/* 0 */
543	  "+A" (v)			/* 1 */
544	: : "ebx", "ecx", "memory", "cc");
545}
546
547static __inline uint64_t
548atomic_swap_64_i586(volatile uint64_t *p, uint64_t v)
549{
550
551	__asm __volatile(
552	"	movl	%%eax,%%ebx ;	"
553	"	movl	%%edx,%%ecx ;	"
554	"1:				"
555	"	" MPLOCKED "		"
556	"	cmpxchg8b %0 ;		"
557	"	jne	1b"
558	: "+m" (*p),			/* 0 */
559	  "+A" (v)			/* 1 */
560	: : "ebx", "ecx", "memory", "cc");
561	return (v);
562}
563
564static __inline int
565atomic_cmpset_64(volatile uint64_t *dst, uint64_t expect, uint64_t src)
566{
567
568	if ((cpu_feature & CPUID_CX8) == 0)
569		return (atomic_cmpset_64_i386(dst, expect, src));
570	else
571		return (atomic_cmpset_64_i586(dst, expect, src));
572}
573
574static __inline int
575atomic_fcmpset_64(volatile uint64_t *dst, uint64_t *expect, uint64_t src)
576{
577
578  	if ((cpu_feature & CPUID_CX8) == 0)
579		return (atomic_fcmpset_64_i386(dst, expect, src));
580	else
581		return (atomic_fcmpset_64_i586(dst, expect, src));
582}
583
584static __inline uint64_t
585atomic_load_acq_64(volatile uint64_t *p)
586{
587
588	if ((cpu_feature & CPUID_CX8) == 0)
589		return (atomic_load_acq_64_i386(p));
590	else
591		return (atomic_load_acq_64_i586(p));
592}
593
594static __inline void
595atomic_store_rel_64(volatile uint64_t *p, uint64_t v)
596{
597
598	if ((cpu_feature & CPUID_CX8) == 0)
599		atomic_store_rel_64_i386(p, v);
600	else
601		atomic_store_rel_64_i586(p, v);
602}
603
604static __inline uint64_t
605atomic_swap_64(volatile uint64_t *p, uint64_t v)
606{
607
608	if ((cpu_feature & CPUID_CX8) == 0)
609		return (atomic_swap_64_i386(p, v));
610	else
611		return (atomic_swap_64_i586(p, v));
612}
613
614static __inline uint64_t
615atomic_fetchadd_64(volatile uint64_t *p, uint64_t v)
616{
617
618	for (;;) {
619		uint64_t t = *p;
620		if (atomic_cmpset_64(p, t, t + v))
621			return (t);
622	}
623}
624
625static __inline void
626atomic_add_64(volatile uint64_t *p, uint64_t v)
627{
628	uint64_t t;
629
630	for (;;) {
631		t = *p;
632		if (atomic_cmpset_64(p, t, t + v))
633			break;
634	}
635}
636
637static __inline void
638atomic_subtract_64(volatile uint64_t *p, uint64_t v)
639{
640	uint64_t t;
641
642	for (;;) {
643		t = *p;
644		if (atomic_cmpset_64(p, t, t - v))
645			break;
646	}
647}
648
649#endif /* _KERNEL */
650
651#endif /* KLD_MODULE || !__GNUCLIKE_ASM */
652
653ATOMIC_ASM(set,	     char,  "orb %b1,%0",  "iq",  v);
654ATOMIC_ASM(clear,    char,  "andb %b1,%0", "iq", ~v);
655ATOMIC_ASM(add,	     char,  "addb %b1,%0", "iq",  v);
656ATOMIC_ASM(subtract, char,  "subb %b1,%0", "iq",  v);
657
658ATOMIC_ASM(set,	     short, "orw %w1,%0",  "ir",  v);
659ATOMIC_ASM(clear,    short, "andw %w1,%0", "ir", ~v);
660ATOMIC_ASM(add,	     short, "addw %w1,%0", "ir",  v);
661ATOMIC_ASM(subtract, short, "subw %w1,%0", "ir",  v);
662
663ATOMIC_ASM(set,	     int,   "orl %1,%0",   "ir",  v);
664ATOMIC_ASM(clear,    int,   "andl %1,%0",  "ir", ~v);
665ATOMIC_ASM(add,	     int,   "addl %1,%0",  "ir",  v);
666ATOMIC_ASM(subtract, int,   "subl %1,%0",  "ir",  v);
667
668ATOMIC_ASM(set,	     long,  "orl %1,%0",   "ir",  v);
669ATOMIC_ASM(clear,    long,  "andl %1,%0",  "ir", ~v);
670ATOMIC_ASM(add,	     long,  "addl %1,%0",  "ir",  v);
671ATOMIC_ASM(subtract, long,  "subl %1,%0",  "ir",  v);
672
673#define	ATOMIC_LOADSTORE(TYPE)				\
674	ATOMIC_LOAD(TYPE);				\
675	ATOMIC_STORE(TYPE)
676
677ATOMIC_LOADSTORE(char);
678ATOMIC_LOADSTORE(short);
679ATOMIC_LOADSTORE(int);
680ATOMIC_LOADSTORE(long);
681
682#undef ATOMIC_ASM
683#undef ATOMIC_LOAD
684#undef ATOMIC_STORE
685#undef ATOMIC_LOADSTORE
686
687#ifndef WANT_FUNCTIONS
688
689static __inline int
690atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src)
691{
692
693	return (atomic_cmpset_int((volatile u_int *)dst, (u_int)expect,
694	    (u_int)src));
695}
696
697static __inline int
698atomic_fcmpset_long(volatile u_long *dst, u_long *expect, u_long src)
699{
700
701	return (atomic_fcmpset_int((volatile u_int *)dst, (u_int *)expect,
702	    (u_int)src));
703}
704
705static __inline u_long
706atomic_fetchadd_long(volatile u_long *p, u_long v)
707{
708
709	return (atomic_fetchadd_int((volatile u_int *)p, (u_int)v));
710}
711
712static __inline int
713atomic_testandset_long(volatile u_long *p, u_int v)
714{
715
716	return (atomic_testandset_int((volatile u_int *)p, v));
717}
718
719static __inline int
720atomic_testandclear_long(volatile u_long *p, u_int v)
721{
722
723	return (atomic_testandclear_int((volatile u_int *)p, v));
724}
725
726/* Read the current value and store a new value in the destination. */
727#ifdef __GNUCLIKE_ASM
728
729static __inline u_int
730atomic_swap_int(volatile u_int *p, u_int v)
731{
732
733	__asm __volatile(
734	"	xchgl	%1,%0 ;		"
735	"# atomic_swap_int"
736	: "+r" (v),			/* 0 */
737	  "+m" (*p));			/* 1 */
738	return (v);
739}
740
741static __inline u_long
742atomic_swap_long(volatile u_long *p, u_long v)
743{
744
745	return (atomic_swap_int((volatile u_int *)p, (u_int)v));
746}
747
748#else /* !__GNUCLIKE_ASM */
749
750u_int	atomic_swap_int(volatile u_int *p, u_int v);
751u_long	atomic_swap_long(volatile u_long *p, u_long v);
752
753#endif /* __GNUCLIKE_ASM */
754
755#define	atomic_set_acq_char		atomic_set_barr_char
756#define	atomic_set_rel_char		atomic_set_barr_char
757#define	atomic_clear_acq_char		atomic_clear_barr_char
758#define	atomic_clear_rel_char		atomic_clear_barr_char
759#define	atomic_add_acq_char		atomic_add_barr_char
760#define	atomic_add_rel_char		atomic_add_barr_char
761#define	atomic_subtract_acq_char	atomic_subtract_barr_char
762#define	atomic_subtract_rel_char	atomic_subtract_barr_char
763#define	atomic_cmpset_acq_char		atomic_cmpset_char
764#define	atomic_cmpset_rel_char		atomic_cmpset_char
765#define	atomic_fcmpset_acq_char		atomic_fcmpset_char
766#define	atomic_fcmpset_rel_char		atomic_fcmpset_char
767
768#define	atomic_set_acq_short		atomic_set_barr_short
769#define	atomic_set_rel_short		atomic_set_barr_short
770#define	atomic_clear_acq_short		atomic_clear_barr_short
771#define	atomic_clear_rel_short		atomic_clear_barr_short
772#define	atomic_add_acq_short		atomic_add_barr_short
773#define	atomic_add_rel_short		atomic_add_barr_short
774#define	atomic_subtract_acq_short	atomic_subtract_barr_short
775#define	atomic_subtract_rel_short	atomic_subtract_barr_short
776#define	atomic_cmpset_acq_short		atomic_cmpset_short
777#define	atomic_cmpset_rel_short		atomic_cmpset_short
778#define	atomic_fcmpset_acq_short	atomic_fcmpset_short
779#define	atomic_fcmpset_rel_short	atomic_fcmpset_short
780
781#define	atomic_set_acq_int		atomic_set_barr_int
782#define	atomic_set_rel_int		atomic_set_barr_int
783#define	atomic_clear_acq_int		atomic_clear_barr_int
784#define	atomic_clear_rel_int		atomic_clear_barr_int
785#define	atomic_add_acq_int		atomic_add_barr_int
786#define	atomic_add_rel_int		atomic_add_barr_int
787#define	atomic_subtract_acq_int		atomic_subtract_barr_int
788#define	atomic_subtract_rel_int		atomic_subtract_barr_int
789#define	atomic_cmpset_acq_int		atomic_cmpset_int
790#define	atomic_cmpset_rel_int		atomic_cmpset_int
791#define	atomic_fcmpset_acq_int		atomic_fcmpset_int
792#define	atomic_fcmpset_rel_int		atomic_fcmpset_int
793
794#define	atomic_set_acq_long		atomic_set_barr_long
795#define	atomic_set_rel_long		atomic_set_barr_long
796#define	atomic_clear_acq_long		atomic_clear_barr_long
797#define	atomic_clear_rel_long		atomic_clear_barr_long
798#define	atomic_add_acq_long		atomic_add_barr_long
799#define	atomic_add_rel_long		atomic_add_barr_long
800#define	atomic_subtract_acq_long	atomic_subtract_barr_long
801#define	atomic_subtract_rel_long	atomic_subtract_barr_long
802#define	atomic_cmpset_acq_long		atomic_cmpset_long
803#define	atomic_cmpset_rel_long		atomic_cmpset_long
804#define	atomic_fcmpset_acq_long		atomic_fcmpset_long
805#define	atomic_fcmpset_rel_long		atomic_fcmpset_long
806
807#define	atomic_readandclear_int(p)	atomic_swap_int(p, 0)
808#define	atomic_readandclear_long(p)	atomic_swap_long(p, 0)
809
810/* Operations on 8-bit bytes. */
811#define	atomic_set_8		atomic_set_char
812#define	atomic_set_acq_8	atomic_set_acq_char
813#define	atomic_set_rel_8	atomic_set_rel_char
814#define	atomic_clear_8		atomic_clear_char
815#define	atomic_clear_acq_8	atomic_clear_acq_char
816#define	atomic_clear_rel_8	atomic_clear_rel_char
817#define	atomic_add_8		atomic_add_char
818#define	atomic_add_acq_8	atomic_add_acq_char
819#define	atomic_add_rel_8	atomic_add_rel_char
820#define	atomic_subtract_8	atomic_subtract_char
821#define	atomic_subtract_acq_8	atomic_subtract_acq_char
822#define	atomic_subtract_rel_8	atomic_subtract_rel_char
823#define	atomic_load_acq_8	atomic_load_acq_char
824#define	atomic_store_rel_8	atomic_store_rel_char
825#define	atomic_cmpset_8		atomic_cmpset_char
826#define	atomic_cmpset_acq_8	atomic_cmpset_acq_char
827#define	atomic_cmpset_rel_8	atomic_cmpset_rel_char
828#define	atomic_fcmpset_8	atomic_fcmpset_char
829#define	atomic_fcmpset_acq_8	atomic_fcmpset_acq_char
830#define	atomic_fcmpset_rel_8	atomic_fcmpset_rel_char
831
832/* Operations on 16-bit words. */
833#define	atomic_set_16		atomic_set_short
834#define	atomic_set_acq_16	atomic_set_acq_short
835#define	atomic_set_rel_16	atomic_set_rel_short
836#define	atomic_clear_16		atomic_clear_short
837#define	atomic_clear_acq_16	atomic_clear_acq_short
838#define	atomic_clear_rel_16	atomic_clear_rel_short
839#define	atomic_add_16		atomic_add_short
840#define	atomic_add_acq_16	atomic_add_acq_short
841#define	atomic_add_rel_16	atomic_add_rel_short
842#define	atomic_subtract_16	atomic_subtract_short
843#define	atomic_subtract_acq_16	atomic_subtract_acq_short
844#define	atomic_subtract_rel_16	atomic_subtract_rel_short
845#define	atomic_load_acq_16	atomic_load_acq_short
846#define	atomic_store_rel_16	atomic_store_rel_short
847#define	atomic_cmpset_16	atomic_cmpset_short
848#define	atomic_cmpset_acq_16	atomic_cmpset_acq_short
849#define	atomic_cmpset_rel_16	atomic_cmpset_rel_short
850#define	atomic_fcmpset_16	atomic_fcmpset_short
851#define	atomic_fcmpset_acq_16	atomic_fcmpset_acq_short
852#define	atomic_fcmpset_rel_16	atomic_fcmpset_rel_short
853
854/* Operations on 32-bit double words. */
855#define	atomic_set_32		atomic_set_int
856#define	atomic_set_acq_32	atomic_set_acq_int
857#define	atomic_set_rel_32	atomic_set_rel_int
858#define	atomic_clear_32		atomic_clear_int
859#define	atomic_clear_acq_32	atomic_clear_acq_int
860#define	atomic_clear_rel_32	atomic_clear_rel_int
861#define	atomic_add_32		atomic_add_int
862#define	atomic_add_acq_32	atomic_add_acq_int
863#define	atomic_add_rel_32	atomic_add_rel_int
864#define	atomic_subtract_32	atomic_subtract_int
865#define	atomic_subtract_acq_32	atomic_subtract_acq_int
866#define	atomic_subtract_rel_32	atomic_subtract_rel_int
867#define	atomic_load_acq_32	atomic_load_acq_int
868#define	atomic_store_rel_32	atomic_store_rel_int
869#define	atomic_cmpset_32	atomic_cmpset_int
870#define	atomic_cmpset_acq_32	atomic_cmpset_acq_int
871#define	atomic_cmpset_rel_32	atomic_cmpset_rel_int
872#define	atomic_fcmpset_32	atomic_fcmpset_int
873#define	atomic_fcmpset_acq_32	atomic_fcmpset_acq_int
874#define	atomic_fcmpset_rel_32	atomic_fcmpset_rel_int
875#define	atomic_swap_32		atomic_swap_int
876#define	atomic_readandclear_32	atomic_readandclear_int
877#define	atomic_fetchadd_32	atomic_fetchadd_int
878#define	atomic_testandset_32	atomic_testandset_int
879#define	atomic_testandclear_32	atomic_testandclear_int
880
881/* Operations on 64-bit quad words. */
882#define	atomic_cmpset_acq_64 atomic_cmpset_64
883#define	atomic_cmpset_rel_64 atomic_cmpset_64
884#define	atomic_fcmpset_acq_64 atomic_fcmpset_64
885#define	atomic_fcmpset_rel_64 atomic_fcmpset_64
886#define	atomic_fetchadd_acq_64	atomic_fetchadd_64
887#define	atomic_fetchadd_rel_64	atomic_fetchadd_64
888#define	atomic_add_acq_64 atomic_add_64
889#define	atomic_add_rel_64 atomic_add_64
890#define	atomic_subtract_acq_64 atomic_subtract_64
891#define	atomic_subtract_rel_64 atomic_subtract_64
892
893/* Operations on pointers. */
894#define	atomic_set_ptr(p, v) \
895	atomic_set_int((volatile u_int *)(p), (u_int)(v))
896#define	atomic_set_acq_ptr(p, v) \
897	atomic_set_acq_int((volatile u_int *)(p), (u_int)(v))
898#define	atomic_set_rel_ptr(p, v) \
899	atomic_set_rel_int((volatile u_int *)(p), (u_int)(v))
900#define	atomic_clear_ptr(p, v) \
901	atomic_clear_int((volatile u_int *)(p), (u_int)(v))
902#define	atomic_clear_acq_ptr(p, v) \
903	atomic_clear_acq_int((volatile u_int *)(p), (u_int)(v))
904#define	atomic_clear_rel_ptr(p, v) \
905	atomic_clear_rel_int((volatile u_int *)(p), (u_int)(v))
906#define	atomic_add_ptr(p, v) \
907	atomic_add_int((volatile u_int *)(p), (u_int)(v))
908#define	atomic_add_acq_ptr(p, v) \
909	atomic_add_acq_int((volatile u_int *)(p), (u_int)(v))
910#define	atomic_add_rel_ptr(p, v) \
911	atomic_add_rel_int((volatile u_int *)(p), (u_int)(v))
912#define	atomic_subtract_ptr(p, v) \
913	atomic_subtract_int((volatile u_int *)(p), (u_int)(v))
914#define	atomic_subtract_acq_ptr(p, v) \
915	atomic_subtract_acq_int((volatile u_int *)(p), (u_int)(v))
916#define	atomic_subtract_rel_ptr(p, v) \
917	atomic_subtract_rel_int((volatile u_int *)(p), (u_int)(v))
918#define	atomic_load_acq_ptr(p) \
919	atomic_load_acq_int((volatile u_int *)(p))
920#define	atomic_store_rel_ptr(p, v) \
921	atomic_store_rel_int((volatile u_int *)(p), (v))
922#define	atomic_cmpset_ptr(dst, old, new) \
923	atomic_cmpset_int((volatile u_int *)(dst), (u_int)(old), (u_int)(new))
924#define	atomic_cmpset_acq_ptr(dst, old, new) \
925	atomic_cmpset_acq_int((volatile u_int *)(dst), (u_int)(old), \
926	    (u_int)(new))
927#define	atomic_cmpset_rel_ptr(dst, old, new) \
928	atomic_cmpset_rel_int((volatile u_int *)(dst), (u_int)(old), \
929	    (u_int)(new))
930#define	atomic_fcmpset_ptr(dst, old, new) \
931	atomic_fcmpset_int((volatile u_int *)(dst), (u_int *)(old), (u_int)(new))
932#define	atomic_fcmpset_acq_ptr(dst, old, new) \
933	atomic_fcmpset_acq_int((volatile u_int *)(dst), (u_int *)(old), \
934	    (u_int)(new))
935#define	atomic_fcmpset_rel_ptr(dst, old, new) \
936	atomic_fcmpset_rel_int((volatile u_int *)(dst), (u_int *)(old), \
937	    (u_int)(new))
938#define	atomic_swap_ptr(p, v) \
939	atomic_swap_int((volatile u_int *)(p), (u_int)(v))
940#define	atomic_readandclear_ptr(p) \
941	atomic_readandclear_int((volatile u_int *)(p))
942
943#endif /* !WANT_FUNCTIONS */
944
945#if defined(_KERNEL)
946#define	mb()	__mbk()
947#define	wmb()	__mbk()
948#define	rmb()	__mbk()
949#else
950#define	mb()	__mbu()
951#define	wmb()	__mbu()
952#define	rmb()	__mbu()
953#endif
954
955#endif /* !_MACHINE_ATOMIC_H_ */
956