1/*
2 * Copyright (c) 2004-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/appleapiopts.h>
30#include <machine/cpu_capabilities.h>
31#include <machine/commpage.h>
32
33/* OSAtomic.h library native implementations. */
34
35	.text
36	.align	2, 0x90
37
38// This is a regparm(3) subroutine used by:
39
40// bool OSAtomicCompareAndSwap32( int32_t old, int32_t new, int32_t *value);
41// int32_t OSAtomicAnd32( int32_t mask, int32_t *value);
42// int32_t OSAtomicOr32( int32_t mask, int32_t *value);
43// int32_t OSAtomicXor32( int32_t mask, int32_t *value);
44
45// It assumes old -> %eax, new -> %edx, value -> %ecx
46// on success: returns with ZF set
47// on failure: returns with *value in %eax, ZF clear
48
49// The first word of the routine contains the address of the first instruction,
50// so callers can pass parameters in registers by using the absolute:
51
52// 	call *_COMPARE_AND_SWAP32
53
54//	TODO: move the .long onto a separate page to reduce icache pollution (?)
55
56Lcompare_and_swap32_mp:
57.long	_COMM_PAGE_COMPARE_AND_SWAP32+4
58	lock
59	cmpxchgl  %edx, (%ecx)
60	ret
61
62    COMMPAGE_DESCRIPTOR(compare_and_swap32_mp,_COMM_PAGE_COMPARE_AND_SWAP32,0,kUP)
63
64Lcompare_and_swap32_up:
65.long	_COMM_PAGE_COMPARE_AND_SWAP32+4
66	cmpxchgl %edx, (%ecx)
67	ret
68
69    COMMPAGE_DESCRIPTOR(compare_and_swap32_up,_COMM_PAGE_COMPARE_AND_SWAP32,kUP,0)
70
71// This is a subroutine used by:
72// bool OSAtomicCompareAndSwap64( int64_t old, int64_t new, int64_t *value);
73
74// It assumes old -> %eax/%edx, new -> %ebx/%ecx, value -> %esi
75// on success: returns with ZF set
76// on failure: returns with *value in %eax/%edx, ZF clear
77
78Lcompare_and_swap64_mp:
79.long	_COMM_PAGE_COMPARE_AND_SWAP64+4
80	lock
81	cmpxchg8b (%esi)
82	ret
83
84    COMMPAGE_DESCRIPTOR(compare_and_swap64_mp,_COMM_PAGE_COMPARE_AND_SWAP64,0,kUP)
85
86Lcompare_and_swap64_up:
87.long	_COMM_PAGE_COMPARE_AND_SWAP64+4
88	cmpxchg8b (%esi)
89	ret
90
91    COMMPAGE_DESCRIPTOR(compare_and_swap64_up,_COMM_PAGE_COMPARE_AND_SWAP64,kUP,0)
92
93// This is a subroutine used by:
94// bool OSAtomicTestAndSet( uint32_t n, void *value );
95// It assumes n -> %eax, value -> %edx
96
97// Returns: old value of bit in CF
98
99Lbit_test_and_set_mp:
100.long	_COMM_PAGE_BTS+4
101	lock
102	btsl %eax, (%edx)
103	ret
104
105    COMMPAGE_DESCRIPTOR(bit_test_and_set_mp,_COMM_PAGE_BTS,0,kUP)
106
107Lbit_test_and_set_up:
108.long	_COMM_PAGE_BTS+4
109	btsl %eax, (%edx)
110	ret
111
112    COMMPAGE_DESCRIPTOR(bit_test_and_set_up,_COMM_PAGE_BTS,kUP,0)
113
114// This is a subroutine used by:
115// bool OSAtomicTestAndClear( uint32_t n, void *value );
116// It assumes n -> %eax, value -> %edx
117
118// Returns: old value of bit in CF
119
120Lbit_test_and_clear_mp:
121.long	_COMM_PAGE_BTC+4
122	lock
123	btrl %eax, (%edx)
124	ret
125
126    COMMPAGE_DESCRIPTOR(bit_test_and_clear_mp,_COMM_PAGE_BTC,0,kUP)
127
128Lbit_test_and_clear_up:
129.long	_COMM_PAGE_BTC+4
130	btrl %eax, (%edx)
131	ret
132
133    COMMPAGE_DESCRIPTOR(bit_test_and_clear_up,_COMM_PAGE_BTC,kUP,0)
134
135// This is a subroutine used by:
136// int32_t OSAtomicAdd32( int32_t amt, int32_t *value );
137// It assumes amt -> %eax, value -> %edx
138
139// Returns: old value in %eax
140// NB: OSAtomicAdd32 returns the new value,  so clients will add amt to %eax
141
142Latomic_add32_mp:
143.long	_COMM_PAGE_ATOMIC_ADD32+4
144	lock
145	xaddl	%eax, (%edx)
146	ret
147
148    COMMPAGE_DESCRIPTOR(atomic_add32_mp,_COMM_PAGE_ATOMIC_ADD32,0,kUP)
149
150Latomic_add32_up:
151.long	_COMM_PAGE_ATOMIC_ADD32+4
152	xaddl	%eax, (%edx)
153	ret
154
155    COMMPAGE_DESCRIPTOR(atomic_add32_up,_COMM_PAGE_ATOMIC_ADD32,kUP,0)
156
157
158// OSMemoryBarrier()
159// These are used both in 32 and 64-bit mode.  We use a fence even on UP
160// machines, so this function can be used with nontemporal stores.
161
162Lmemory_barrier:
163	lock
164	addl	$0,(%esp)
165	ret
166
167    COMMPAGE_DESCRIPTOR(memory_barrier,_COMM_PAGE_MEMORY_BARRIER,0,kHasSSE2);
168
169Lmemory_barrier_sse2:
170	mfence
171	ret
172
173    COMMPAGE_DESCRIPTOR(memory_barrier_sse2,_COMM_PAGE_MEMORY_BARRIER,kHasSSE2,0);
174
175
176/*
177 *	typedef	volatile struct {
178 *		void	*opaque1;  <-- ptr to 1st queue element or null
179 *		long	 opaque2;  <-- generation count
180 *	} OSQueueHead;
181 *
182 * void  OSAtomicEnqueue( OSQueueHead *list, void *new, size_t offset);
183 */
184
185LAtomicEnqueue:
186	pushl	%edi
187	pushl	%esi
188	pushl	%ebx
189	movl	16(%esp),%edi	// %edi == ptr to list head
190	movl	20(%esp),%ebx	// %ebx == new
191	movl	24(%esp),%esi	// %esi == offset
192	movl	(%edi),%eax	// %eax == ptr to 1st element in Q
193	movl	4(%edi),%edx	// %edx == current generation count
1941:
195	movl	%eax,(%ebx,%esi)// link to old list head from new element
196	movl	%edx,%ecx
197	incl	%ecx		// increment generation count
198	lock			// always lock for now...
199	cmpxchg8b (%edi)	// ...push on new element
200	jnz	1b
201	popl	%ebx
202	popl	%esi
203	popl	%edi
204	ret
205
206    COMMPAGE_DESCRIPTOR(AtomicEnqueue,_COMM_PAGE_ENQUEUE,0,0)
207
208
209/* void* OSAtomicDequeue( OSQueueHead *list, size_t offset); */
210
211LAtomicDequeue:
212	pushl	%edi
213	pushl	%esi
214	pushl	%ebx
215	movl	16(%esp),%edi	// %edi == ptr to list head
216	movl	20(%esp),%esi	// %esi == offset
217	movl	(%edi),%eax	// %eax == ptr to 1st element in Q
218	movl	4(%edi),%edx	// %edx == current generation count
2191:
220	testl	%eax,%eax	// list empty?
221	jz	2f		// yes
222	movl	(%eax,%esi),%ebx // point to 2nd in Q
223	movl	%edx,%ecx
224	incl	%ecx		// increment generation count
225	lock			// always lock for now...
226	cmpxchg8b (%edi)	// ...pop off 1st element
227	jnz	1b
2282:
229	popl	%ebx
230	popl	%esi
231	popl	%edi
232	ret			// ptr to 1st element in Q still in %eax
233
234    COMMPAGE_DESCRIPTOR(AtomicDequeue,_COMM_PAGE_DEQUEUE,0,0)
235
236
237
238/************************* x86_64 versions follow **************************/
239
240
241// This is a subroutine used by:
242
243// bool OSAtomicCompareAndSwap32( int32_t old, int32_t new, int32_t *value);
244// int32_t OSAtomicAnd32( int32_t mask, int32_t *value);
245// int32_t OSAtomicOr32( int32_t mask, int32_t *value);
246// int32_t OSAtomicXor32( int32_t mask, int32_t *value);
247
248// It assumes: old -> %rdi  (ie, it follows the ABI parameter conventions)
249//             new -> %rsi
250//             value -> %rdx
251// on success: returns with ZF set
252// on failure: returns with *value in %eax, ZF clear
253
254	.code64
255Lcompare_and_swap32_mp_64:
256	movl	%edi,%eax			// put old value where "cmpxchg" wants it
257	lock
258	cmpxchgl  %esi, (%rdx)
259	ret
260
261    COMMPAGE_DESCRIPTOR(compare_and_swap32_mp_64,_COMM_PAGE_COMPARE_AND_SWAP32,0,kUP)
262
263	.code64
264Lcompare_and_swap32_up_64:
265	movl	%edi,%eax			// put old value where "cmpxchg" wants it
266	cmpxchgl  %esi, (%rdx)
267	ret
268
269    COMMPAGE_DESCRIPTOR(compare_and_swap32_up_64,_COMM_PAGE_COMPARE_AND_SWAP32,kUP,0)
270
271// This is a subroutine used by:
272// bool OSAtomicCompareAndSwap64( int64_t old, int64_t new, int64_t *value);
273
274// It assumes: old -> %rdi  (ie, it follows the ABI parameter conventions)
275//             new -> %rsi
276//             value -> %rdx
277// on success: returns with ZF set
278// on failure: returns with *value in %rax, ZF clear
279
280	.code64
281Lcompare_and_swap64_mp_64:
282	movq	%rdi,%rax			// put old value where "cmpxchg" wants it
283	lock
284	cmpxchgq  %rsi, (%rdx)
285	ret
286
287    COMMPAGE_DESCRIPTOR(compare_and_swap64_mp_64,_COMM_PAGE_COMPARE_AND_SWAP64,0,kUP)
288
289	.code64
290Lcompare_and_swap64_up_64:
291	movq	%rdi,%rax			// put old value where "cmpxchg" wants it
292	cmpxchgq  %rsi, (%rdx)
293	ret
294
295    COMMPAGE_DESCRIPTOR(compare_and_swap64_up_64,_COMM_PAGE_COMPARE_AND_SWAP64,kUP,0)
296
297// This is a subroutine used by:
298// bool OSAtomicTestAndSet( uint32_t n, void *value );
299// It is called with standard register conventions:
300//			n = %rdi
301//			value = %rsi
302// Returns: old value of bit in CF
303
304	.code64
305Lbit_test_and_set_mp_64:
306	lock
307	btsl %edi, (%rsi)
308	ret
309
310    COMMPAGE_DESCRIPTOR(bit_test_and_set_mp_64,_COMM_PAGE_BTS,0,kUP)
311
312	.code64
313Lbit_test_and_set_up_64:
314	btsl %edi, (%rsi)
315	ret
316
317    COMMPAGE_DESCRIPTOR(bit_test_and_set_up_64,_COMM_PAGE_BTS,kUP,0)
318
319// This is a subroutine used by:
320// bool OSAtomicTestAndClear( uint32_t n, void *value );
321// It is called with standard register conventions:
322//			n = %rdi
323//			value = %rsi
324// Returns: old value of bit in CF
325
326	.code64
327Lbit_test_and_clear_mp_64:
328	lock
329	btrl %edi, (%rsi)
330	ret
331
332    COMMPAGE_DESCRIPTOR(bit_test_and_clear_mp_64,_COMM_PAGE_BTC,0,kUP)
333
334	.code64
335Lbit_test_and_clear_up_64:
336	btrl %edi, (%rsi)
337	ret
338
339    COMMPAGE_DESCRIPTOR(bit_test_and_clear_up_64,_COMM_PAGE_BTC,kUP,0)
340
341// This is a subroutine used by:
342// int32_t OSAtomicAdd32( int32_t amt, int32_t *value );
343// It is called with standard register conventions:
344//			amt = %rdi
345//			value = %rsi
346// Returns: old value in %edi
347// NB: OSAtomicAdd32 returns the new value,  so clients will add amt to %edi
348
349	.code64
350Latomic_add32_mp_64:
351	lock
352	xaddl	%edi, (%rsi)
353	ret
354
355    COMMPAGE_DESCRIPTOR(atomic_add32_mp_64,_COMM_PAGE_ATOMIC_ADD32,0,kUP)
356
357	.code64
358Latomic_add32_up_64:
359	xaddl	%edi, (%rsi)
360	ret
361
362    COMMPAGE_DESCRIPTOR(atomic_add32_up_64,_COMM_PAGE_ATOMIC_ADD32,kUP,0)
363
364// This is a subroutine used by:
365// int64_t OSAtomicAdd64( int64_t amt, int64_t *value );
366// It is called with standard register conventions:
367//			amt = %rdi
368//			value = %rsi
369// Returns: old value in %rdi
370// NB: OSAtomicAdd64 returns the new value,  so clients will add amt to %rdi
371
372	.code64
373Latomic_add64_mp_64:
374	lock
375	xaddq	%rdi, (%rsi)
376	ret
377
378    COMMPAGE_DESCRIPTOR(atomic_add64_mp_64,_COMM_PAGE_ATOMIC_ADD64,0,kUP)
379
380	.code64
381Latomic_add64_up_64:
382	xaddq	%rdi, (%rsi)
383	ret
384
385    COMMPAGE_DESCRIPTOR(atomic_add64_up_64,_COMM_PAGE_ATOMIC_ADD64,kUP,0)
386
387
388/*
389 *	typedef	volatile struct {
390 *		void	*opaque1;  <-- ptr to 1st queue element or null
391 *		long	 opaque2;  <-- generation count
392 *	} OSQueueHead;
393 *
394 * void  OSAtomicEnqueue( OSQueueHead *list, void *new, size_t offset);
395 */
396
397	.code64
398LAtomicEnqueue_64:		// %rdi == list head, %rsi == new, %rdx == offset
399	pushq	%rbx
400	movq	%rsi,%rbx	// %rbx == new
401	movq	%rdx,%rsi	// %rsi == offset
402	movq	(%rdi),%rax	// %rax == ptr to 1st element in Q
403	movq	8(%rdi),%rdx	// %rdx == current generation count
4041:
405	movq	%rax,(%rbx,%rsi)// link to old list head from new element
406	movq	%rdx,%rcx
407	incq	%rcx		// increment generation count
408	lock			// always lock for now...
409	cmpxchg16b (%rdi)	// ...push on new element
410	jnz	1b
411	popq	%rbx
412	ret
413
414    COMMPAGE_DESCRIPTOR(AtomicEnqueue_64,_COMM_PAGE_ENQUEUE,0,0)
415
416
417/* void* OSAtomicDequeue( OSQueueHead *list, size_t offset); */
418
419	.code64
420LAtomicDequeue_64:		// %rdi == list head, %rsi == offset
421	pushq	%rbx
422	movq	(%rdi),%rax	// %rax == ptr to 1st element in Q
423	movq	8(%rdi),%rdx	// %rdx == current generation count
4241:
425	testq	%rax,%rax	// list empty?
426	jz	2f		// yes
427	movq	(%rax,%rsi),%rbx // point to 2nd in Q
428	movq	%rdx,%rcx
429	incq	%rcx		// increment generation count
430	lock			// always lock for now...
431	cmpxchg16b (%rdi)	// ...pop off 1st element
432	jnz	1b
4332:
434	popq	%rbx
435	ret			// ptr to 1st element in Q still in %rax
436
437    COMMPAGE_DESCRIPTOR(AtomicDequeue_64,_COMM_PAGE_DEQUEUE,0,0)
438