1/*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1989 Carnegie-Mellon University
34 * All rights reserved.  The CMU software License Agreement specifies
35 * the terms and conditions for use and redistribution.
36 */
37
38#include <mach_rt.h>
39#include <platforms.h>
40#include <mach_ldebug.h>
41#include <i386/asm.h>
42#include <i386/eflags.h>
43#include <i386/trap.h>
44#include <config_dtrace.h>
45#include <i386/mp.h>
46
47#include "assym.s"
48
49#define	PAUSE		rep; nop
50
51#include <i386/pal_lock_asm.h>
52
53/*
54 *	When performance isn't the only concern, it's
55 *	nice to build stack frames...
56 */
57#define	BUILD_STACK_FRAMES   (GPROF)
58
59#if	BUILD_STACK_FRAMES
60
61/* Stack-frame-relative: */
62#define	L_PC		B_PC
63#define	L_ARG0		B_ARG0
64#define	L_ARG1		B_ARG1
65
66#define LEAF_ENTRY(name)	\
67	Entry(name);		\
68	FRAME;			\
69	MCOUNT
70
71#define LEAF_ENTRY2(n1,n2)	\
72	Entry(n1);		\
73	Entry(n2);		\
74	FRAME;			\
75	MCOUNT
76
77#define LEAF_RET		\
78	EMARF;			\
79	ret
80
81#else	/* BUILD_STACK_FRAMES */
82
83/* Stack-pointer-relative: */
84#define	L_PC		S_PC
85#define	L_ARG0		S_ARG0
86#define	L_ARG1		S_ARG1
87
88#define LEAF_ENTRY(name)	\
89	Entry(name)
90
91#define LEAF_ENTRY2(n1,n2)	\
92	Entry(n1);		\
93	Entry(n2)
94
95#define LEAF_RET		\
96	ret
97
98#endif	/* BUILD_STACK_FRAMES */
99
100
101/* Non-leaf routines always have a stack frame: */
102
103#define NONLEAF_ENTRY(name)	\
104	Entry(name);		\
105	FRAME;			\
106	MCOUNT
107
108#define NONLEAF_ENTRY2(n1,n2)	\
109	Entry(n1);		\
110	Entry(n2);		\
111	FRAME;			\
112	MCOUNT
113
114#define NONLEAF_RET		\
115	EMARF;			\
116	ret
117
118
119/* For x86_64, the varargs ABI requires that %al indicate
120 * how many SSE register contain arguments. In our case, 0 */
121#if __i386__
122#define ALIGN_STACK()		subl $8, %esp; andl	$0xFFFFFFF0, %esp ;
123#define LOAD_STRING_ARG0(label)	movl $##label, (%esp) ;
124#define LOAD_ARG1(x)		mov  x, 4(%esp)	;
125#define LOAD_PTR_ARG1(x)	mov  x, 4(%esp)	;
126#define CALL_PANIC()		call EXT(panic) ;
127#else
128#define ALIGN_STACK() 		and  $0xFFFFFFFFFFFFFFF0, %rsp ;
129#define LOAD_STRING_ARG0(label)	leaq label(%rip), %rdi ;
130#define LOAD_ARG1(x)		mov x, %esi ;
131#define LOAD_PTR_ARG1(x)	mov x, %rsi ;
132#define CALL_PANIC()		xorb %al,%al ; call EXT(panic) ;
133#endif
134
135#define	CHECK_UNLOCK(current, owner)				\
136	cmp	current, owner				;	\
137	je	1f					;	\
138	ALIGN_STACK()					;	\
139	LOAD_STRING_ARG0(2f)				;	\
140	CALL_PANIC()					;	\
141	hlt						;	\
142	.data						;	\
1432:	String	"Mutex unlock attempted from non-owner thread";	\
144	.text						;	\
1451:
146
147#if	MACH_LDEBUG
148/*
149 *  Routines for general lock debugging.
150 */
151
152/*
153 * Checks for expected lock types and calls "panic" on
154 * mismatch.  Detects calls to Mutex functions with
155 * type simplelock and vice versa.
156 */
157#define	CHECK_MUTEX_TYPE()					\
158	cmpl	$ MUTEX_TAG,M_TYPE			;	\
159	je	1f					;	\
160	ALIGN_STACK()					;	\
161	LOAD_STRING_ARG0(2f)				;	\
162	CALL_PANIC()					;	\
163	hlt						;	\
164	.data						;	\
1652:	String	"not a mutex!"				;	\
166	.text						;	\
1671:
168
169/*
170 * If one or more simplelocks are currently held by a thread,
171 * an attempt to acquire a mutex will cause this check to fail
172 * (since a mutex lock may context switch, holding a simplelock
173 * is not a good thing).
174 */
175#if	MACH_RT
176#define CHECK_PREEMPTION_LEVEL()				\
177	cmpl	$0,%gs:CPU_HIBERNATE			;	\
178	jne	1f					;	\
179	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL		;	\
180	je	1f					;	\
181	ALIGN_STACK()					;	\
182	movl	%gs:CPU_PREEMPTION_LEVEL, %eax		;	\
183	LOAD_ARG1(%eax)					;	\
184	LOAD_STRING_ARG0(2f)				;	\
185	CALL_PANIC()					;	\
186	hlt						;	\
187	.data						;	\
1882:	String	"preemption_level(%d) != 0!"		;	\
189	.text						;	\
1901:
191#else	/* MACH_RT */
192#define	CHECK_PREEMPTION_LEVEL()
193#endif	/* MACH_RT */
194
195#define	CHECK_MYLOCK(current, owner)				\
196	cmp	current, owner				;	\
197	jne	1f					;	\
198	ALIGN_STACK()					;	\
199	LOAD_STRING_ARG0(2f)				;	\
200	CALL_PANIC()					;	\
201	hlt						;	\
202	.data						;	\
2032:	String	"Attempt to recursively lock a non-recursive lock";	\
204	.text						;	\
2051:
206
207#else	/* MACH_LDEBUG */
208#define	CHECK_MUTEX_TYPE()
209#define CHECK_PREEMPTION_LEVEL()
210#define	CHECK_MYLOCK(thd)
211#endif	/* MACH_LDEBUG */
212
213#define PREEMPTION_DISABLE				\
214	incl	%gs:CPU_PREEMPTION_LEVEL
215
216#define	PREEMPTION_LEVEL_DEBUG 1
217#if	PREEMPTION_LEVEL_DEBUG
218#define	PREEMPTION_ENABLE				\
219	decl	%gs:CPU_PREEMPTION_LEVEL	;	\
220	js	17f				;	\
221	jnz	19f				;	\
222	testl	$AST_URGENT,%gs:CPU_PENDING_AST	;	\
223	jz	19f				;	\
224	PUSHF					;	\
225	testl	$EFL_IF, S_PC			;	\
226	jz	18f				;	\
227	POPF					;	\
228	int	$(T_PREEMPT)			;	\
229	jmp	19f				;	\
23017:							\
231	call	_preemption_underflow_panic	;	\
23218:							\
233	POPF					;	\
23419:
235#else
236#define	PREEMPTION_ENABLE				\
237	decl	%gs:CPU_PREEMPTION_LEVEL	;	\
238	jnz	19f				;	\
239	testl	$AST_URGENT,%gs:CPU_PENDING_AST	;	\
240	jz	19f				;	\
241	PUSHF					;	\
242	testl	$EFL_IF, S_PC			;	\
243	jz	18f				;	\
244	POPF					;	\
245	int	$(T_PREEMPT)			;	\
246	jmp	19f				;	\
24718:							\
248	POPF					;	\
24919:
250#endif
251
252
253#if	CONFIG_DTRACE
254
255       .globl  _lockstat_probe
256       .globl  _lockstat_probemap
257
258/*
259 * LOCKSTAT_LABEL creates a dtrace symbol which contains
260 * a pointer into the lock code function body. At that
261 * point is a "ret" instruction that can be patched into
262 * a "nop"
263 */
264
265#if defined(__i386__)
266
267#define	LOCKSTAT_LABEL(lab) \
268	.data				;\
269	.globl	lab			;\
270	lab:				;\
271	.long 9f			;\
272	.text				;\
273	9:
274
275#define	LOCKSTAT_RECORD(id, lck) \
276	push	%ebp					;	\
277	mov	%esp,%ebp				;	\
278	sub	$0x38,%esp	/* size of dtrace_probe args */ ; \
279	movl	_lockstat_probemap + (id * 4),%eax	;	\
280	test	%eax,%eax				;	\
281	je	9f					;	\
282	movl	$0,36(%esp)				;	\
283	movl	$0,40(%esp)				;	\
284	movl	$0,28(%esp)				;	\
285	movl	$0,32(%esp)				;	\
286	movl	$0,20(%esp)				;	\
287	movl	$0,24(%esp)				;	\
288	movl	$0,12(%esp)				;	\
289	movl	$0,16(%esp)				;	\
290	movl	lck,4(%esp)	/* copy lock pointer to arg 1 */ ; \
291	movl	$0,8(%esp)				;	\
292	movl	%eax,(%esp) 				; 	\
293	call	*_lockstat_probe			;	\
2949:	leave
295	/* ret - left to subsequent code, e.g. return values */
296
297#elif defined(__x86_64__)
298#define        LOCKSTAT_LABEL(lab) \
299       .data                                       ;\
300       .globl  lab                                 ;\
301       lab:                                        ;\
302       .quad 9f                                    ;\
303       .text                                       ;\
304       9:
305
306#define LOCKSTAT_RECORD(id, lck) \
307       push    %rbp                                ;       \
308       mov     %rsp,%rbp                           ;       \
309       movl    _lockstat_probemap + (id * 4)(%rip),%eax ;  \
310       test    %eax,%eax                           ;       \
311       je              9f                          ;       \
312       mov             lck, %rsi                   ;       \
313       mov             %rax, %rdi                  ;       \
314       mov             $0, %rdx                    ;       \
315       mov             $0, %rcx                    ;       \
316       mov             $0, %r8                     ;       \
317       mov             $0, %r9                     ;       \
318       call    *_lockstat_probe(%rip)              ;       \
3199:	leave
320	/* ret - left to subsequent code, e.g. return values */
321#else
322#error Unsupported architecture
323#endif
324#endif /* CONFIG_DTRACE */
325
326/*
327 * For most routines, the hw_lock_t pointer is loaded into a
328 * register initially, and then either a byte or register-sized
329 * word is loaded/stored to the pointer
330 */
331
332#if defined(__i386__)
333#define	HW_LOCK_REGISTER	%edx
334#define	LOAD_HW_LOCK_REGISTER mov L_ARG0, HW_LOCK_REGISTER
335#define	HW_LOCK_THREAD_REGISTER	%ecx
336#define	LOAD_HW_LOCK_THREAD_REGISTER mov %gs:CPU_ACTIVE_THREAD, HW_LOCK_THREAD_REGISTER
337#define	HW_LOCK_MOV_WORD	movl
338#define	HW_LOCK_EXAM_REGISTER	%eax
339#elif defined(__x86_64__)
340#define	HW_LOCK_REGISTER	%rdi
341#define	LOAD_HW_LOCK_REGISTER
342#define	HW_LOCK_THREAD_REGISTER	%rcx
343#define	LOAD_HW_LOCK_THREAD_REGISTER mov %gs:CPU_ACTIVE_THREAD, HW_LOCK_THREAD_REGISTER
344#define	HW_LOCK_MOV_WORD	movq
345#define	HW_LOCK_EXAM_REGISTER	%rax
346#else
347#error Unsupported architecture
348#endif
349
350/*
351 *	void hw_lock_init(hw_lock_t)
352 *
353 *	Initialize a hardware lock.
354 */
355LEAF_ENTRY(hw_lock_init)
356	LOAD_HW_LOCK_REGISTER		/* fetch lock pointer */
357	HW_LOCK_MOV_WORD $0, (HW_LOCK_REGISTER)		/* clear the lock */
358	LEAF_RET
359
360
361/*
362 *	void hw_lock_byte_init(volatile uint8_t *)
363 *
364 *	Initialize a hardware byte lock.
365 */
366LEAF_ENTRY(hw_lock_byte_init)
367	LOAD_HW_LOCK_REGISTER		/* fetch lock pointer */
368	movb $0, (HW_LOCK_REGISTER)		/* clear the lock */
369	LEAF_RET
370
371/*
372 *	void hw_lock_lock(hw_lock_t)
373 *
374 *	Acquire lock, spinning until it becomes available.
375 *	MACH_RT:  also return with preemption disabled.
376 */
377LEAF_ENTRY(hw_lock_lock)
378	LOAD_HW_LOCK_REGISTER		/* fetch lock pointer */
379	LOAD_HW_LOCK_THREAD_REGISTER	/* get thread pointer */
380
381	PREEMPTION_DISABLE
3821:
383	mov	(HW_LOCK_REGISTER), HW_LOCK_EXAM_REGISTER
384	test	HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER		/* lock locked? */
385	jne	3f			/* branch if so */
386	lock; cmpxchg	HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER)	/* try to acquire the HW lock */
387	jne	3f
388	movl	$1,%eax			/* In case this was a timeout call */
389	LEAF_RET			/* if yes, then nothing left to do */
3903:
391	PAUSE				/* pause for hyper-threading */
392	jmp	1b			/* try again */
393
394/*
395 *	void	hw_lock_byte_lock(uint8_t *lock_byte)
396 *
397 *	Acquire byte sized lock operand, spinning until it becomes available.
398 *	MACH_RT:  also return with preemption disabled.
399 */
400
401LEAF_ENTRY(hw_lock_byte_lock)
402	LOAD_HW_LOCK_REGISTER		/* Load lock pointer */
403	PREEMPTION_DISABLE
404	movl	$1, %ecx		/* Set lock value */
4051:
406	movb	(HW_LOCK_REGISTER), %al		/* Load byte at address */
407	testb	%al,%al			/* lock locked? */
408	jne	3f			/* branch if so */
409	lock; cmpxchg	%cl,(HW_LOCK_REGISTER)	/* attempt atomic compare exchange */
410	jne	3f
411	LEAF_RET			/* if yes, then nothing left to do */
4123:
413	PAUSE				/* pause for hyper-threading */
414	jmp	1b			/* try again */
415
416/*
417 *	unsigned int hw_lock_to(hw_lock_t, unsigned int)
418 *
419 *	Acquire lock, spinning until it becomes available or timeout.
420 *	MACH_RT:  also return with preemption disabled.
421 */
422LEAF_ENTRY(hw_lock_to)
4231:
424	LOAD_HW_LOCK_REGISTER		/* fetch lock pointer */
425	LOAD_HW_LOCK_THREAD_REGISTER
426
427	/*
428	 * Attempt to grab the lock immediately
429	 * - fastpath without timeout nonsense.
430	 */
431	PREEMPTION_DISABLE
432
433	mov	(HW_LOCK_REGISTER), HW_LOCK_EXAM_REGISTER
434	test	HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER		/* lock locked? */
435	jne	2f			/* branch if so */
436	lock; cmpxchg	HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER)	/* try to acquire the HW lock */
437	jne	2f			/* branch on failure */
438	movl	$1,%eax
439	LEAF_RET
440
4412:
442#define	INNER_LOOP_COUNT	1000
443	/*
444	 * Failed to get the lock so set the timeout
445	 * and then spin re-checking the lock but pausing
446	 * every so many (INNER_LOOP_COUNT) spins to check for timeout.
447	 */
448#if __i386__
449	movl	L_ARG1,%ecx		/* fetch timeout */
450	push	%edi
451	push	%ebx
452	mov	%edx,%edi
453
454	lfence
455	rdtsc				/* read cyclecount into %edx:%eax */
456	addl	%ecx,%eax		/* fetch and timeout */
457	adcl	$0,%edx			/* add carry */
458	mov	%edx,%ecx
459	mov	%eax,%ebx		/* %ecx:%ebx is the timeout expiry */
460	mov	%edi, %edx		/* load lock back into %edx */
461#else
462	push	%r9
463	lfence
464	rdtsc				/* read cyclecount into %edx:%eax */
465	shlq	$32, %rdx
466	orq	%rdx, %rax		/* load 64-bit quantity into %rax */
467	addq	%rax, %rsi		/* %rsi is the timeout expiry */
468#endif
469
4704:
471	/*
472	 * The inner-loop spin to look for the lock being freed.
473	 */
474#if __i386__
475	mov	$(INNER_LOOP_COUNT),%edi
476#else
477	mov	$(INNER_LOOP_COUNT),%r9
478#endif
4795:
480	PAUSE				/* pause for hyper-threading */
481	mov	(HW_LOCK_REGISTER),HW_LOCK_EXAM_REGISTER		/* spin checking lock value in cache */
482	test	HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER
483	je	6f			/* zero => unlocked, try to grab it */
484#if __i386__
485	decl	%edi			/* decrement inner loop count */
486#else
487	decq	%r9			/* decrement inner loop count */
488#endif
489	jnz	5b			/* time to check for timeout? */
490
491	/*
492	 * Here after spinning INNER_LOOP_COUNT times, check for timeout
493	 */
494#if __i386__
495	mov	%edx,%edi		/* Save %edx */
496	lfence
497	rdtsc				/* cyclecount into %edx:%eax */
498	xchg	%edx,%edi		/* cyclecount into %edi:%eax */
499	cmpl	%ecx,%edi		/* compare high-order 32-bits */
500	jb	4b			/* continue spinning if less, or */
501	cmpl	%ebx,%eax		/* compare low-order 32-bits */
502	jb	4b			/* continue if less, else bail */
503	xor	%eax,%eax		/* with 0 return value */
504	pop	%ebx
505	pop	%edi
506#else
507	lfence
508	rdtsc				/* cyclecount into %edx:%eax */
509	shlq	$32, %rdx
510	orq	%rdx, %rax		/* load 64-bit quantity into %rax */
511	cmpq	%rsi, %rax		/* compare to timeout */
512	jb	4b			/* continue spinning if less, or */
513	xor	%rax,%rax		/* with 0 return value */
514	pop	%r9
515#endif
516	LEAF_RET
517
5186:
519	/*
520	 * Here to try to grab the lock that now appears to be free
521	 * after contention.
522	 */
523	LOAD_HW_LOCK_THREAD_REGISTER
524	lock; cmpxchg	HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER)	/* try to acquire the HW lock */
525	jne	4b			/* no - spin again */
526	movl	$1,%eax			/* yes */
527#if __i386__
528	pop	%ebx
529	pop	%edi
530#else
531	pop	%r9
532#endif
533	LEAF_RET
534
535/*
536 *	void hw_lock_unlock(hw_lock_t)
537 *
538 *	Unconditionally release lock.
539 *	MACH_RT:  release preemption level.
540 */
541LEAF_ENTRY(hw_lock_unlock)
542	LOAD_HW_LOCK_REGISTER		/* fetch lock pointer */
543	HW_LOCK_MOV_WORD $0, (HW_LOCK_REGISTER)		/* clear the lock */
544	PREEMPTION_ENABLE
545	LEAF_RET
546
547/*
548 *	void hw_lock_byte_unlock(uint8_t *lock_byte)
549 *
550 *	Unconditionally release byte sized lock operand.
551 *	MACH_RT:  release preemption level.
552 */
553
554LEAF_ENTRY(hw_lock_byte_unlock)
555	LOAD_HW_LOCK_REGISTER		/* Load lock pointer */
556	movb $0, (HW_LOCK_REGISTER)		/* Clear the lock byte */
557	PREEMPTION_ENABLE
558	LEAF_RET
559
560/*
561 *	unsigned int hw_lock_try(hw_lock_t)
562 *	MACH_RT:  returns with preemption disabled on success.
563 */
564LEAF_ENTRY(hw_lock_try)
565	LOAD_HW_LOCK_REGISTER		/* fetch lock pointer */
566	LOAD_HW_LOCK_THREAD_REGISTER
567	PREEMPTION_DISABLE
568
569	mov	(HW_LOCK_REGISTER),HW_LOCK_EXAM_REGISTER
570	test	HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER
571	jne	1f
572	lock; cmpxchg	HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER)	/* try to acquire the HW lock */
573	jne	1f
574
575	movl	$1,%eax			/* success */
576	LEAF_RET
577
5781:
579	PREEMPTION_ENABLE		/* failure:  release preemption... */
580	xorl	%eax,%eax		/* ...and return failure */
581	LEAF_RET
582
583/*
584 *	unsigned int hw_lock_held(hw_lock_t)
585 *	MACH_RT:  doesn't change preemption state.
586 *	N.B.  Racy, of course.
587 */
588LEAF_ENTRY(hw_lock_held)
589	LOAD_HW_LOCK_REGISTER		/* fetch lock pointer */
590	mov	(HW_LOCK_REGISTER),HW_LOCK_EXAM_REGISTER		/* check lock value */
591	test	HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER
592	movl	$1,%ecx
593	cmovne	%ecx,%eax		/* 0 => unlocked, 1 => locked */
594	LEAF_RET
595
596
597/*
598 * Reader-writer lock fastpaths. These currently exist for the
599 * shared lock acquire, the exclusive lock acquire, the shared to
600 * exclusive upgrade and the release paths (where they reduce overhead
601 * considerably) -- these are by far the most frequently used routines
602 *
603 * The following should reflect the layout of the bitfield embedded within
604 * the lck_rw_t structure (see i386/locks.h).
605 */
606#define LCK_RW_INTERLOCK	(0x1 << 16)
607
608#define LCK_RW_PRIV_EXCL	(0x1 << 24)
609#define LCK_RW_WANT_UPGRADE	(0x2 << 24)
610#define LCK_RW_WANT_WRITE	(0x4 << 24)
611#define LCK_R_WAITING		(0x8 << 24)
612#define LCK_W_WAITING		(0x10 << 24)
613
614#define LCK_RW_SHARED_MASK	(0xffff)
615
616/*
617 * For most routines, the lck_rw_t pointer is loaded into a
618 * register initially, and the flags bitfield loaded into another
619 * register and examined
620 */
621
622#if defined(__i386__)
623#define	LCK_RW_REGISTER	%edx
624#define	LOAD_LCK_RW_REGISTER mov S_ARG0, LCK_RW_REGISTER
625#define	LCK_RW_FLAGS_REGISTER	%eax
626#define	LOAD_LCK_RW_FLAGS_REGISTER mov (LCK_RW_REGISTER), LCK_RW_FLAGS_REGISTER
627#elif defined(__x86_64__)
628#define	LCK_RW_REGISTER	%rdi
629#define	LOAD_LCK_RW_REGISTER
630#define	LCK_RW_FLAGS_REGISTER	%eax
631#define	LOAD_LCK_RW_FLAGS_REGISTER mov (LCK_RW_REGISTER), LCK_RW_FLAGS_REGISTER
632#else
633#error Unsupported architecture
634#endif
635
636#define	RW_LOCK_SHARED_MASK (LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE)
637/*
638 *	void lck_rw_lock_shared(lck_rw_t *)
639 *
640 */
641Entry(lck_rw_lock_shared)
642	LOAD_LCK_RW_REGISTER
6431:
644	LOAD_LCK_RW_FLAGS_REGISTER		/* Load state bitfield and interlock */
645	testl	$(RW_LOCK_SHARED_MASK), %eax	/* Eligible for fastpath? */
646	jne	3f
647
648	movl	%eax, %ecx			/* original value in %eax for cmpxchgl */
649	incl	%ecx				/* Increment reader refcount */
650	lock
651	cmpxchgl %ecx, (LCK_RW_REGISTER)			/* Attempt atomic exchange */
652	jne	2f
653
654#if	CONFIG_DTRACE
655	/*
656	 * Dtrace lockstat event: LS_LCK_RW_LOCK_SHARED_ACQUIRE
657	 * Implemented by swapping between return and no-op instructions.
658	 * See bsd/dev/dtrace/lockstat.c.
659	 */
660	LOCKSTAT_LABEL(_lck_rw_lock_shared_lockstat_patch_point)
661	ret
662	/*
663	Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER
664	*/
665	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER)
666#endif
667	ret
6682:
669	PAUSE
670	jmp	1b
6713:
672	jmp	EXT(lck_rw_lock_shared_gen)
673
674
675
676#define	RW_TRY_LOCK_SHARED_MASK (LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE)
677/*
678 *	void lck_rw_try_lock_shared(lck_rw_t *)
679 *
680 */
681Entry(lck_rw_try_lock_shared)
682	LOAD_LCK_RW_REGISTER
6831:
684	LOAD_LCK_RW_FLAGS_REGISTER		/* Load state bitfield and interlock */
685	testl	$(LCK_RW_INTERLOCK), %eax
686	jne	2f
687	testl	$(RW_TRY_LOCK_SHARED_MASK), %eax
688	jne	3f			/* lock is busy */
689
690	movl	%eax, %ecx			/* original value in %eax for cmpxchgl */
691	incl	%ecx				/* Increment reader refcount */
692	lock
693	cmpxchgl %ecx, (LCK_RW_REGISTER)			/* Attempt atomic exchange */
694	jne	2f
695
696#if	CONFIG_DTRACE
697	movl	$1, %eax
698	/*
699	 * Dtrace lockstat event: LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE
700	 * Implemented by swapping between return and no-op instructions.
701	 * See bsd/dev/dtrace/lockstat.c.
702	 */
703	LOCKSTAT_LABEL(_lck_rw_try_lock_shared_lockstat_patch_point)
704	ret
705    /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER  */
706    LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER)
707#endif
708	movl	$1, %eax			/* return TRUE */
709	ret
7102:
711	PAUSE
712	jmp	1b
7133:
714	xorl	%eax, %eax
715	ret
716
717
718#define	RW_LOCK_EXCLUSIVE_HELD	(LCK_RW_WANT_WRITE | LCK_RW_WANT_UPGRADE)
719/*
720 *	int lck_rw_grab_shared(lck_rw_t *)
721 *
722 */
723Entry(lck_rw_grab_shared)
724	LOAD_LCK_RW_REGISTER
7251:
726	LOAD_LCK_RW_FLAGS_REGISTER		/* Load state bitfield and interlock */
727	testl	$(LCK_RW_INTERLOCK), %eax
728	jne	5f
729	testl	$(RW_LOCK_EXCLUSIVE_HELD), %eax
730	jne	3f
7312:
732	movl	%eax, %ecx			/* original value in %eax for cmpxchgl */
733	incl	%ecx				/* Increment reader refcount */
734	lock
735	cmpxchgl %ecx, (LCK_RW_REGISTER)			/* Attempt atomic exchange */
736	jne	4f
737
738	movl	$1, %eax			/* return success */
739	ret
7403:
741	testl	$(LCK_RW_SHARED_MASK), %eax
742	je	4f
743	testl	$(LCK_RW_PRIV_EXCL), %eax
744	je	2b
7454:
746	xorl	%eax, %eax			/* return failure */
747	ret
7485:
749	PAUSE
750	jmp	1b
751
752
753
754#define	RW_LOCK_EXCLUSIVE_MASK (LCK_RW_SHARED_MASK | LCK_RW_INTERLOCK | \
755	                        LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE)
756/*
757 *	void lck_rw_lock_exclusive(lck_rw_t*)
758 *
759 */
760Entry(lck_rw_lock_exclusive)
761	LOAD_LCK_RW_REGISTER
7621:
763	LOAD_LCK_RW_FLAGS_REGISTER		/* Load state bitfield, interlock and shared count */
764	testl	$(RW_LOCK_EXCLUSIVE_MASK), %eax		/* Eligible for fastpath? */
765	jne	3f					/* no, go slow */
766
767	movl	%eax, %ecx				/* original value in %eax for cmpxchgl */
768	orl	$(LCK_RW_WANT_WRITE), %ecx
769	lock
770	cmpxchgl %ecx, (LCK_RW_REGISTER)			/* Attempt atomic exchange */
771	jne	2f
772
773#if	CONFIG_DTRACE
774	/*
775	 * Dtrace lockstat event: LS_LCK_RW_LOCK_EXCL_ACQUIRE
776	 * Implemented by swapping between return and no-op instructions.
777	 * See bsd/dev/dtrace/lockstat.c.
778	 */
779	LOCKSTAT_LABEL(_lck_rw_lock_exclusive_lockstat_patch_point)
780	ret
781    /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER  */
782    LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, LCK_RW_REGISTER)
783#endif
784	ret
7852:
786	PAUSE
787	jmp	1b
7883:
789	jmp	EXT(lck_rw_lock_exclusive_gen)
790
791
792
793#define	RW_TRY_LOCK_EXCLUSIVE_MASK (LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE)
794/*
795 *	void lck_rw_try_lock_exclusive(lck_rw_t *)
796 *
797 *		Tries to get a write lock.
798 *
799 *		Returns FALSE if the lock is not held on return.
800 */
801Entry(lck_rw_try_lock_exclusive)
802	LOAD_LCK_RW_REGISTER
8031:
804	LOAD_LCK_RW_FLAGS_REGISTER		/* Load state bitfield, interlock and shared count */
805	testl	$(LCK_RW_INTERLOCK), %eax
806	jne	2f
807	testl	$(RW_TRY_LOCK_EXCLUSIVE_MASK), %eax
808	jne	3f					/* can't get it */
809
810	movl	%eax, %ecx				/* original value in %eax for cmpxchgl */
811	orl	$(LCK_RW_WANT_WRITE), %ecx
812	lock
813	cmpxchgl %ecx, (LCK_RW_REGISTER)			/* Attempt atomic exchange */
814	jne	2f
815
816#if	CONFIG_DTRACE
817	movl	$1, %eax
818	/*
819	 * Dtrace lockstat event: LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE
820	 * Implemented by swapping between return and no-op instructions.
821	 * See bsd/dev/dtrace/lockstat.c.
822	 */
823	LOCKSTAT_LABEL(_lck_rw_try_lock_exclusive_lockstat_patch_point)
824	ret
825    /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER  */
826    LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, LCK_RW_REGISTER)
827#endif
828	movl	$1, %eax			/* return TRUE */
829	ret
8302:
831	PAUSE
832	jmp	1b
8333:
834	xorl	%eax, %eax			/* return FALSE */
835	ret
836
837
838
839/*
840 *	void lck_rw_lock_shared_to_exclusive(lck_rw_t*)
841 *
842 *	fastpath can be taken if
843 *	the current rw_shared_count == 1
844 *	AND the interlock is clear
845 *	AND RW_WANT_UPGRADE is not set
846 *
847 *	note that RW_WANT_WRITE could be set, but will not
848 *	be indicative of an exclusive hold since we have
849 * 	a read count on the lock that we have not yet released
850 *	we can blow by that state since the lck_rw_lock_exclusive
851 * 	function will block until rw_shared_count == 0 and
852 * 	RW_WANT_UPGRADE is clear... it does this check behind
853 *	the interlock which we are also checking for
854 *
855 * 	to make the transition we must be able to atomically
856 *	set RW_WANT_UPGRADE and get rid of the read count we hold
857 */
858Entry(lck_rw_lock_shared_to_exclusive)
859	LOAD_LCK_RW_REGISTER
8601:
861	LOAD_LCK_RW_FLAGS_REGISTER		/* Load state bitfield, interlock and shared count */
862	testl	$(LCK_RW_INTERLOCK), %eax
863	jne	7f
864	testl	$(LCK_RW_WANT_UPGRADE), %eax
865	jne	2f
866
867	movl	%eax, %ecx			/* original value in %eax for cmpxchgl */
868	orl	$(LCK_RW_WANT_UPGRADE), %ecx	/* ask for WANT_UPGRADE */
869	decl	%ecx				/* and shed our read count */
870	lock
871	cmpxchgl %ecx, (LCK_RW_REGISTER)			/* Attempt atomic exchange */
872	jne	7f
873						/* we now own the WANT_UPGRADE */
874	testl	$(LCK_RW_SHARED_MASK), %ecx	/* check to see if all of the readers are drained */
875	jne	8f				/* if not, we need to go wait */
876
877#if	CONFIG_DTRACE
878	movl	$1, %eax
879	/*
880	 * Dtrace lockstat event: LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE
881	 * Implemented by swapping between return and no-op instructions.
882	 * See bsd/dev/dtrace/lockstat.c.
883	 */
884	LOCKSTAT_LABEL(_lck_rw_lock_shared_to_exclusive_lockstat_patch_point)
885	ret
886    /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER  */
887    LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, LCK_RW_REGISTER)
888#endif
889	movl	$1, %eax			/* return success */
890	ret
891
8922:						/* someone else already holds WANT_UPGRADE */
893	movl	%eax, %ecx			/* original value in %eax for cmpxchgl */
894	decl	%ecx				/* shed our read count */
895	testl	$(LCK_RW_SHARED_MASK), %ecx
896	jne	3f				/* we were the last reader */
897	andl	$(~LCK_W_WAITING), %ecx		/* so clear the wait indicator */
8983:
899	lock
900	cmpxchgl %ecx, (LCK_RW_REGISTER)			/* Attempt atomic exchange */
901	jne	7f
902
903#if __i386__
904	pushl	%eax				/* go check to see if we need to */
905	push	%edx				/* wakeup anyone */
906	call	EXT(lck_rw_lock_shared_to_exclusive_failure)
907	addl	$8, %esp
908#else
909	mov	%eax, %esi			/* put old flags as second arg */
910						/* lock is alread in %rdi */
911	call	EXT(lck_rw_lock_shared_to_exclusive_failure)
912#endif
913	ret					/* and pass the failure return along */
9147:
915	PAUSE
916	jmp	1b
9178:
918	jmp	EXT(lck_rw_lock_shared_to_exclusive_success)
919
920
921
922	.cstring
923rwl_release_error_str:
924	.asciz  "Releasing non-exclusive RW lock without a reader refcount!"
925	.text
926
927/*
928 *	lck_rw_type_t lck_rw_done(lck_rw_t *)
929 *
930 */
931Entry(lck_rw_done)
932	LOAD_LCK_RW_REGISTER
9331:
934	LOAD_LCK_RW_FLAGS_REGISTER		/* Load state bitfield, interlock and reader count */
935	testl   $(LCK_RW_INTERLOCK), %eax
936	jne     7f				/* wait for interlock to clear */
937
938	movl	%eax, %ecx			/* keep original value in %eax for cmpxchgl */
939	testl	$(LCK_RW_SHARED_MASK), %ecx	/* if reader count == 0, must be exclusive lock */
940	je	2f
941	decl	%ecx				/* Decrement reader count */
942	testl	$(LCK_RW_SHARED_MASK), %ecx	/* if reader count has now gone to 0, check for waiters */
943	je	4f
944	jmp	6f
9452:
946	testl	$(LCK_RW_WANT_UPGRADE), %ecx
947	je	3f
948	andl	$(~LCK_RW_WANT_UPGRADE), %ecx
949	jmp	4f
9503:
951	testl	$(LCK_RW_WANT_WRITE), %ecx
952	je	8f				/* lock is not 'owned', go panic */
953	andl	$(~LCK_RW_WANT_WRITE), %ecx
9544:
955	/*
956	 * test the original values to match what
957	 * lck_rw_done_gen is going to do to determine
958	 * which wakeups need to happen...
959	 *
960	 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
961	 */
962	testl	$(LCK_W_WAITING), %eax
963	je	5f
964	andl	$(~LCK_W_WAITING), %ecx
965
966	testl	$(LCK_RW_PRIV_EXCL), %eax
967	jne	6f
9685:
969	andl	$(~LCK_R_WAITING), %ecx
9706:
971	lock
972	cmpxchgl %ecx, (LCK_RW_REGISTER)			/* Attempt atomic exchange */
973	jne	7f
974
975#if __i386__
976	pushl	%eax
977	push	%edx
978	call	EXT(lck_rw_done_gen)
979	addl	$8, %esp
980#else
981	mov	%eax,%esi	/* old flags in %rsi */
982				/* lock is in %rdi already */
983	call	EXT(lck_rw_done_gen)
984#endif
985	ret
9867:
987	PAUSE
988	jmp	1b
9898:
990	ALIGN_STACK()
991	LOAD_STRING_ARG0(rwl_release_error_str)
992	CALL_PANIC()
993
994
995
996/*
997 *	lck_rw_type_t lck_rw_lock_exclusive_to_shared(lck_rw_t *)
998 *
999 */
1000Entry(lck_rw_lock_exclusive_to_shared)
1001	LOAD_LCK_RW_REGISTER
10021:
1003	LOAD_LCK_RW_FLAGS_REGISTER		/* Load state bitfield, interlock and reader count */
1004	testl   $(LCK_RW_INTERLOCK), %eax
1005	jne     6f				/* wait for interlock to clear */
1006
1007	movl	%eax, %ecx			/* keep original value in %eax for cmpxchgl */
1008	incl	%ecx				/* Increment reader count */
1009
1010	testl	$(LCK_RW_WANT_UPGRADE), %ecx
1011	je	2f
1012	andl	$(~LCK_RW_WANT_UPGRADE), %ecx
1013	jmp	3f
10142:
1015	andl	$(~LCK_RW_WANT_WRITE), %ecx
10163:
1017	/*
1018	 * test the original values to match what
1019	 * lck_rw_lock_exclusive_to_shared_gen is going to do to determine
1020	 * which wakeups need to happen...
1021	 *
1022	 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
1023	 */
1024	testl	$(LCK_W_WAITING), %eax
1025	je	4f
1026	testl	$(LCK_RW_PRIV_EXCL), %eax
1027	jne	5f
10284:
1029	andl	$(~LCK_R_WAITING), %ecx
10305:
1031	lock
1032	cmpxchgl %ecx, (LCK_RW_REGISTER)			/* Attempt atomic exchange */
1033	jne	6f
1034
1035#if __i386__
1036	pushl	%eax
1037	push	%edx
1038	call	EXT(lck_rw_lock_exclusive_to_shared_gen)
1039	addl	$8, %esp
1040#else
1041	mov	%eax,%esi
1042	call	EXT(lck_rw_lock_exclusive_to_shared_gen)
1043#endif
1044	ret
10456:
1046	PAUSE
1047	jmp	1b
1048
1049
1050
1051/*
1052 *	int lck_rw_grab_want(lck_rw_t *)
1053 *
1054 */
1055Entry(lck_rw_grab_want)
1056	LOAD_LCK_RW_REGISTER
10571:
1058	LOAD_LCK_RW_FLAGS_REGISTER		/* Load state bitfield, interlock and reader count */
1059	testl   $(LCK_RW_INTERLOCK), %eax
1060	jne     3f				/* wait for interlock to clear */
1061	testl	$(LCK_RW_WANT_WRITE), %eax	/* want_write has been grabbed by someone else */
1062	jne	2f				/* go return failure */
1063
1064	movl	%eax, %ecx			/* original value in %eax for cmpxchgl */
1065	orl	$(LCK_RW_WANT_WRITE), %ecx
1066	lock
1067	cmpxchgl %ecx, (LCK_RW_REGISTER)			/* Attempt atomic exchange */
1068	jne	2f
1069						/* we now own want_write */
1070	movl	$1, %eax			/* return success */
1071	ret
10722:
1073	xorl	%eax, %eax			/* return failure */
1074	ret
10753:
1076	PAUSE
1077	jmp	1b
1078
1079
1080#define	RW_LOCK_SHARED_OR_UPGRADE_MASK (LCK_RW_SHARED_MASK | LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE)
1081/*
1082 *	int lck_rw_held_read_or_upgrade(lck_rw_t *)
1083 *
1084 */
1085Entry(lck_rw_held_read_or_upgrade)
1086	LOAD_LCK_RW_REGISTER
1087	LOAD_LCK_RW_FLAGS_REGISTER		/* Load state bitfield, interlock and reader count */
1088	andl	$(RW_LOCK_SHARED_OR_UPGRADE_MASK), %eax
1089	ret
1090
1091
1092
1093/*
1094 * N.B.: On x86, statistics are currently recorded for all indirect mutexes.
1095 * Also, only the acquire attempt count (GRP_MTX_STAT_UTIL) is maintained
1096 * as a 64-bit quantity (this matches the existing PowerPC implementation,
1097 * and the new x86 specific statistics are also maintained as 32-bit
1098 * quantities).
1099 *
1100 *
1101 * Enable this preprocessor define to record the first miss alone
1102 * By default, we count every miss, hence multiple misses may be
1103 * recorded for a single lock acquire attempt via lck_mtx_lock
1104 */
1105#undef LOG_FIRST_MISS_ALONE
1106
1107/*
1108 * This preprocessor define controls whether the R-M-W update of the
1109 * per-group statistics elements are atomic (LOCK-prefixed)
1110 * Enabled by default.
1111 */
1112#define ATOMIC_STAT_UPDATES 1
1113
1114#if defined(ATOMIC_STAT_UPDATES)
1115#define LOCK_IF_ATOMIC_STAT_UPDATES lock
1116#else
1117#define LOCK_IF_ATOMIC_STAT_UPDATES
1118#endif /* ATOMIC_STAT_UPDATES */
1119
1120
1121/*
1122 * For most routines, the lck_mtx_t pointer is loaded into a
1123 * register initially, and the owner field checked for indirection.
1124 * Eventually the lock owner is loaded into a register and examined.
1125 */
1126
1127#define M_OWNER		MUTEX_OWNER
1128#define M_PTR		MUTEX_PTR
1129#define M_STATE		MUTEX_STATE
1130
1131#if defined(__i386__)
1132
1133#define LMTX_ARG0	B_ARG0
1134#define LMTX_ARG1	B_ARG1
1135#define	LMTX_REG	%edx
1136#define LMTX_A_REG	%eax
1137#define LMTX_A_REG32	%eax
1138#define LMTX_C_REG	%ecx
1139#define LMTX_C_REG32	%ecx
1140#define LMTX_RET_REG	%eax
1141#define LMTX_RET_REG32	%eax
1142#define LMTX_LGROUP_REG	%esi
1143#define LMTX_SSTATE_REG	%edi
1144#define	LOAD_LMTX_REG(arg)	mov arg, LMTX_REG
1145#define LMTX_CHK_EXTENDED	cmp LMTX_REG, LMTX_ARG0
1146#define LMTX_ASSERT_OWNED	cmpl $(MUTEX_ASSERT_OWNED), LMTX_ARG1
1147
1148#define LMTX_ENTER_EXTENDED					\
1149	mov	M_PTR(LMTX_REG), LMTX_REG 		;	\
1150	push	LMTX_LGROUP_REG	 		 	;	\
1151	push	LMTX_SSTATE_REG			     	;	\
1152	xor	LMTX_SSTATE_REG, LMTX_SSTATE_REG	;	\
1153	mov	MUTEX_GRP(LMTX_REG), LMTX_LGROUP_REG 	;	\
1154	LOCK_IF_ATOMIC_STAT_UPDATES			;	\
1155	addl	$1, GRP_MTX_STAT_UTIL(LMTX_LGROUP_REG)	;	\
1156	jnc	11f			    		;	\
1157	incl	GRP_MTX_STAT_UTIL+4(LMTX_LGROUP_REG)	;	\
115811:
1159
1160#define LMTX_EXIT_EXTENDED		\
1161	pop	LMTX_SSTATE_REG	;	\
1162	pop	LMTX_LGROUP_REG
1163
1164
1165#define	LMTX_CHK_EXTENDED_EXIT			\
1166	cmp 	LMTX_REG, LMTX_ARG0	;	\
1167	je	12f			;	\
1168	pop	LMTX_SSTATE_REG		;	\
1169	pop	LMTX_LGROUP_REG		;	\
117012:
1171
1172
1173#if	LOG_FIRST_MISS_ALONE
1174#define LMTX_UPDATE_MISS					\
1175	test	$1, LMTX_SSTATE_REG 			;	\
1176	jnz	11f					;	\
1177	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
1178	incl	GRP_MTX_STAT_MISS(LMTX_LGROUP_REG)	;	\
1179	or	$1, LMTX_SSTATE_REG			;	\
118011:
1181#else
1182#define LMTX_UPDATE_MISS					\
1183	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
1184	incl	GRP_MTX_STAT_MISS(LMTX_LGROUP_REG)
1185#endif
1186
1187
1188#if	LOG_FIRST_MISS_ALONE
1189#define LMTX_UPDATE_WAIT					\
1190	test	$2, LMTX_SSTATE_REG 			;	\
1191	jnz	11f					;	\
1192	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
1193	incl	GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG)	;	\
1194	or	$2, LMTX_SSTATE_REG			;	\
119511:
1196#else
1197#define LMTX_UPDATE_WAIT					\
1198	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
1199	incl	GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG)
1200#endif
1201
1202
1203/*
1204 * Record the "direct wait" statistic, which indicates if a
1205 * miss proceeded to block directly without spinning--occurs
1206 * if the owner of the mutex isn't running on another processor
1207 * at the time of the check.
1208 */
1209#define LMTX_UPDATE_DIRECT_WAIT					\
1210	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
1211	incl	GRP_MTX_STAT_DIRECT_WAIT(LMTX_LGROUP_REG)
1212
1213
1214#define LMTX_CALLEXT1(func_name)	\
1215	push	LMTX_REG	;	\
1216	push	LMTX_REG	;	\
1217	call	EXT(func_name)	;	\
1218	add	$4, %esp	;	\
1219	pop	LMTX_REG
1220
1221#define LMTX_CALLEXT2(func_name, reg)	\
1222	push	LMTX_REG	;	\
1223	push	reg		;	\
1224	push	LMTX_REG	;	\
1225	call	EXT(func_name)	;	\
1226	add	$8, %esp	;	\
1227	pop	LMTX_REG
1228
1229#elif defined(__x86_64__)
1230
1231#define LMTX_ARG0	%rdi
1232#define LMTX_ARG1	%rsi
1233#define LMTX_REG_ORIG	%rdi
1234#define	LMTX_REG	%rdx
1235#define LMTX_A_REG	%rax
1236#define LMTX_A_REG32	%eax
1237#define LMTX_C_REG	%rcx
1238#define LMTX_C_REG32	%ecx
1239#define LMTX_RET_REG	%rax
1240#define LMTX_RET_REG32	%eax
1241#define LMTX_LGROUP_REG	%r10
1242#define LMTX_SSTATE_REG	%r11
1243#define	LOAD_LMTX_REG(arg)	mov %rdi, %rdx
1244#define LMTX_CHK_EXTENDED	cmp LMTX_REG, LMTX_REG_ORIG
1245#define LMTX_ASSERT_OWNED	cmp $(MUTEX_ASSERT_OWNED), LMTX_ARG1
1246
1247#define LMTX_ENTER_EXTENDED					\
1248	mov	M_PTR(LMTX_REG), LMTX_REG 		;	\
1249	xor	LMTX_SSTATE_REG, LMTX_SSTATE_REG	;	\
1250	mov	MUTEX_GRP(LMTX_REG), LMTX_LGROUP_REG 	;	\
1251	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
1252	incq	GRP_MTX_STAT_UTIL(LMTX_LGROUP_REG)
1253
1254#define LMTX_EXIT_EXTENDED
1255
1256#define	LMTX_CHK_EXTENDED_EXIT
1257
1258
1259#if	LOG_FIRST_MISS_ALONE
1260#define LMTX_UPDATE_MISS					\
1261	test	$1, LMTX_SSTATE_REG 			;	\
1262	jnz	11f					;	\
1263	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
1264	incl	GRP_MTX_STAT_MISS(LMTX_LGROUP_REG)	;	\
1265	or	$1, LMTX_SSTATE_REG			;	\
126611:
1267#else
1268#define LMTX_UPDATE_MISS					\
1269	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
1270	incl	GRP_MTX_STAT_MISS(LMTX_LGROUP_REG)
1271#endif
1272
1273
1274#if	LOG_FIRST_MISS_ALONE
1275#define LMTX_UPDATE_WAIT					\
1276	test	$2, LMTX_SSTATE_REG 			;	\
1277	jnz	11f					;	\
1278	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
1279	incl	GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG)	;	\
1280	or	$2, LMTX_SSTATE_REG			;	\
128111:
1282#else
1283#define LMTX_UPDATE_WAIT					\
1284	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
1285	incl	GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG)
1286#endif
1287
1288
1289/*
1290 * Record the "direct wait" statistic, which indicates if a
1291 * miss proceeded to block directly without spinning--occurs
1292 * if the owner of the mutex isn't running on another processor
1293 * at the time of the check.
1294 */
1295#define LMTX_UPDATE_DIRECT_WAIT					\
1296	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
1297	incl	GRP_MTX_STAT_DIRECT_WAIT(LMTX_LGROUP_REG)
1298
1299
1300#define LMTX_CALLEXT1(func_name)		\
1301	LMTX_CHK_EXTENDED		;	\
1302	je	12f			;	\
1303	push	LMTX_LGROUP_REG		;	\
1304	push	LMTX_SSTATE_REG		;	\
130512:	push	LMTX_REG_ORIG		;	\
1306	push	LMTX_REG		;	\
1307	mov	LMTX_REG, LMTX_ARG0	;	\
1308	call	EXT(func_name)		;	\
1309	pop	LMTX_REG		;	\
1310	pop	LMTX_REG_ORIG		;	\
1311	LMTX_CHK_EXTENDED		;	\
1312	je	12f			;	\
1313	pop	LMTX_SSTATE_REG		;	\
1314	pop	LMTX_LGROUP_REG		;	\
131512:
1316
1317#define LMTX_CALLEXT2(func_name, reg)		\
1318	LMTX_CHK_EXTENDED		;	\
1319	je	12f			;	\
1320	push	LMTX_LGROUP_REG		;	\
1321	push	LMTX_SSTATE_REG		;	\
132212:	push	LMTX_REG_ORIG		;	\
1323	push	LMTX_REG		;	\
1324	mov	reg, LMTX_ARG1		;	\
1325	mov	LMTX_REG, LMTX_ARG0	;	\
1326	call	EXT(func_name)		;	\
1327	pop	LMTX_REG		;	\
1328	pop	LMTX_REG_ORIG		;	\
1329	LMTX_CHK_EXTENDED		;	\
1330	je	12f			;	\
1331	pop	LMTX_SSTATE_REG		;	\
1332	pop	LMTX_LGROUP_REG		;	\
133312:
1334
1335#else
1336#error Unsupported architecture
1337#endif
1338
1339
1340#define M_WAITERS_MSK		0x0000ffff
1341#define M_PRIORITY_MSK		0x00ff0000
1342#define M_ILOCKED_MSK		0x01000000
1343#define M_MLOCKED_MSK		0x02000000
1344#define M_PROMOTED_MSK		0x04000000
1345#define M_SPIN_MSK		0x08000000
1346
1347/*
1348 *	void lck_mtx_assert(lck_mtx_t* l, unsigned int)
1349 *	Takes the address of a lock, and an assertion type as parameters.
1350 *	The assertion can take one of two forms determine by the type
1351 *	parameter: either the lock is held by the current thread, and the
1352 *	type is	LCK_MTX_ASSERT_OWNED, or it isn't and the type is
1353 *	LCK_MTX_ASSERT_NOTOWNED. Calls panic on assertion failure.
1354 *
1355 */
1356
1357NONLEAF_ENTRY(lck_mtx_assert)
1358        LOAD_LMTX_REG(B_ARG0)	                   	/* Load lock address */
1359	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG	/* Load current thread */
1360
1361	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1362	cmp	$(MUTEX_IND), LMTX_C_REG32	/* Is this an indirect mutex? */
1363	jne	0f
1364	mov	M_PTR(LMTX_REG), LMTX_REG	/* If so, take indirection */
13650:
1366	mov	M_OWNER(LMTX_REG), LMTX_C_REG	/* Load owner */
1367	LMTX_ASSERT_OWNED
1368	jne	2f				/* Assert ownership? */
1369	cmp	LMTX_A_REG, LMTX_C_REG		/* Current thread match? */
1370	jne	3f				/* no, go panic */
1371	testl	$(M_ILOCKED_MSK | M_MLOCKED_MSK), M_STATE(LMTX_REG)
1372	je	3f
13731:						/* yes, we own it */
1374	NONLEAF_RET
13752:
1376	cmp	LMTX_A_REG, LMTX_C_REG		/* Current thread match? */
1377	jne	1b				/* No, return */
1378	ALIGN_STACK()
1379	LOAD_PTR_ARG1(LMTX_REG)
1380	LOAD_STRING_ARG0(mutex_assert_owned_str)
1381	jmp	4f
13823:
1383	ALIGN_STACK()
1384	LOAD_PTR_ARG1(LMTX_REG)
1385	LOAD_STRING_ARG0(mutex_assert_not_owned_str)
13864:
1387	CALL_PANIC()
1388
1389
1390lck_mtx_destroyed:
1391	ALIGN_STACK()
1392	LOAD_PTR_ARG1(LMTX_REG)
1393	LOAD_STRING_ARG0(mutex_interlock_destroyed_str)
1394	CALL_PANIC()
1395
1396
1397.data
1398mutex_assert_not_owned_str:
1399	.asciz	"mutex (%p) not owned\n"
1400mutex_assert_owned_str:
1401	.asciz	"mutex (%p) owned\n"
1402mutex_interlock_destroyed_str:
1403	.asciz	"trying to interlock destroyed mutex (%p)"
1404.text
1405
1406
1407
1408/*
1409 * lck_mtx_lock()
1410 * lck_mtx_try_lock()
1411 * lck_mtx_unlock()
1412 * lck_mtx_lock_spin()
1413 * lck_mtx_lock_spin_always()
1414 * lck_mtx_convert_spin()
1415 */
1416NONLEAF_ENTRY(lck_mtx_lock_spin_always)
1417	LOAD_LMTX_REG(B_ARG0)           /* fetch lock pointer */
1418	jmp     Llmls_avoid_check
1419
1420NONLEAF_ENTRY(lck_mtx_lock_spin)
1421	LOAD_LMTX_REG(B_ARG0)		/* fetch lock pointer */
1422
1423	CHECK_PREEMPTION_LEVEL()
1424Llmls_avoid_check:
1425	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1426	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32	/* is the interlock or mutex held */
1427	jnz	Llmls_slow
1428Llmls_try:					/* no - can't be INDIRECT, DESTROYED or locked */
1429	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
1430	or	$(M_ILOCKED_MSK | M_SPIN_MSK), LMTX_C_REG32
1431
1432	PREEMPTION_DISABLE
1433	lock
1434	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
1435	jne	Llmls_busy_disabled
1436
1437 	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG
1438	mov	LMTX_A_REG, M_OWNER(LMTX_REG)	/* record owner of interlock */
1439#if	MACH_LDEBUG
1440	test	LMTX_A_REG, LMTX_A_REG
1441	jz	1f
1442	incl	TH_MUTEX_COUNT(LMTX_A_REG)	/* lock statistic */
14431:
1444#endif	/* MACH_LDEBUG */
1445
1446	LMTX_CHK_EXTENDED_EXIT
1447	/* return with the interlock held and preemption disabled */
1448	leave
1449#if	CONFIG_DTRACE
1450	LOCKSTAT_LABEL(_lck_mtx_lock_spin_lockstat_patch_point)
1451	ret
1452	/* inherit lock pointer in LMTX_REG above */
1453	LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, LMTX_REG)
1454#endif
1455	ret
1456
1457Llmls_slow:
1458	test	$M_ILOCKED_MSK, LMTX_C_REG32		/* is the interlock held */
1459	jz	Llml_contended				/* no, must have been the mutex */
1460
1461	cmp	$(MUTEX_DESTROYED), LMTX_C_REG32	/* check to see if its marked destroyed */
1462	je	lck_mtx_destroyed
1463	cmp	$(MUTEX_IND), LMTX_C_REG32		/* Is this an indirect mutex */
1464	jne	Llmls_loop				/* no... must be interlocked */
1465
1466	LMTX_ENTER_EXTENDED
1467
1468	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1469	test	$(M_SPIN_MSK), LMTX_C_REG32
1470	jz	Llmls_loop1
1471
1472	LMTX_UPDATE_MISS		/* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
1473Llmls_loop:
1474	PAUSE
1475	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1476Llmls_loop1:
1477	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32
1478	jz	Llmls_try
1479	test	$(M_MLOCKED_MSK), LMTX_C_REG32
1480	jnz	Llml_contended				/* mutex owned by someone else, go contend for it */
1481	jmp	Llmls_loop
1482
1483Llmls_busy_disabled:
1484	PREEMPTION_ENABLE
1485	jmp	Llmls_loop
1486
1487
1488
1489NONLEAF_ENTRY(lck_mtx_lock)
1490	LOAD_LMTX_REG(B_ARG0)		/* fetch lock pointer */
1491
1492	CHECK_PREEMPTION_LEVEL()
1493
1494	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1495	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32	/* is the interlock or mutex held */
1496	jnz	Llml_slow
1497Llml_try:					/* no - can't be INDIRECT, DESTROYED or locked */
1498	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
1499	or	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32
1500
1501	PREEMPTION_DISABLE
1502	lock
1503	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
1504	jne	Llml_busy_disabled
1505
1506 	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG
1507	mov	LMTX_A_REG, M_OWNER(LMTX_REG)	/* record owner of mutex */
1508#if	MACH_LDEBUG
1509	test	LMTX_A_REG, LMTX_A_REG
1510	jz	1f
1511	incl	TH_MUTEX_COUNT(LMTX_A_REG)	/* lock statistic */
15121:
1513#endif	/* MACH_LDEBUG */
1514
1515	testl	$(M_WAITERS_MSK), M_STATE(LMTX_REG)
1516	jz	Llml_finish
1517
1518	LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
1519
1520Llml_finish:
1521	andl	$(~M_ILOCKED_MSK), M_STATE(LMTX_REG)
1522	PREEMPTION_ENABLE
1523
1524	LMTX_CHK_EXTENDED		/* is this an extended mutex */
1525	jne	2f
1526
1527	leave
1528#if	CONFIG_DTRACE
1529	LOCKSTAT_LABEL(_lck_mtx_lock_lockstat_patch_point)
1530	ret
1531	/* inherit lock pointer in LMTX_REG above */
1532	LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, LMTX_REG)
1533#endif
1534	ret
15352:
1536	LMTX_EXIT_EXTENDED
1537	leave
1538#if	CONFIG_DTRACE
1539	LOCKSTAT_LABEL(_lck_mtx_lock_ext_lockstat_patch_point)
1540	ret
1541	/* inherit lock pointer in LMTX_REG above */
1542	LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, LMTX_REG)
1543#endif
1544	ret
1545
1546
1547Llml_slow:
1548	test	$M_ILOCKED_MSK, LMTX_C_REG32		/* is the interlock held */
1549	jz	Llml_contended				/* no, must have been the mutex */
1550
1551	cmp	$(MUTEX_DESTROYED), LMTX_C_REG32	/* check to see if its marked destroyed */
1552	je	lck_mtx_destroyed
1553	cmp	$(MUTEX_IND), LMTX_C_REG32		/* Is this an indirect mutex? */
1554	jne	Llml_loop				/* no... must be interlocked */
1555
1556	LMTX_ENTER_EXTENDED
1557
1558	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1559	test	$(M_SPIN_MSK), LMTX_C_REG32
1560	jz	Llml_loop1
1561
1562	LMTX_UPDATE_MISS		/* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
1563Llml_loop:
1564	PAUSE
1565	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1566Llml_loop1:
1567	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32
1568	jz	Llml_try
1569	test	$(M_MLOCKED_MSK), LMTX_C_REG32
1570	jnz	Llml_contended				/* mutex owned by someone else, go contend for it */
1571	jmp	Llml_loop
1572
1573Llml_busy_disabled:
1574	PREEMPTION_ENABLE
1575	jmp	Llml_loop
1576
1577
1578Llml_contended:
1579	LMTX_CHK_EXTENDED		/* is this an extended mutex */
1580	je	0f
1581	LMTX_UPDATE_MISS
15820:
1583	LMTX_CALLEXT1(lck_mtx_lock_spinwait_x86)
1584
1585	test	LMTX_RET_REG, LMTX_RET_REG
1586	jz	Llml_acquired		/* acquired mutex, interlock held and preemption disabled */
1587
1588	cmp	$1, LMTX_RET_REG	/* check for direct wait status */
1589	je	2f
1590	LMTX_CHK_EXTENDED		/* is this an extended mutex */
1591	je	2f
1592	LMTX_UPDATE_DIRECT_WAIT
15932:
1594	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1595	test	$(M_ILOCKED_MSK), LMTX_C_REG32
1596	jnz	6f
1597
1598	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
1599	or	$(M_ILOCKED_MSK), LMTX_C_REG32	/* try to take the interlock */
1600
1601	PREEMPTION_DISABLE
1602	lock
1603	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
1604	jne	5f
1605
1606	test	$(M_MLOCKED_MSK), LMTX_C_REG32	/* we've got the interlock and */
1607	jnz	3f
1608	or	$(M_MLOCKED_MSK), LMTX_C_REG32	/* the mutex is free... grab it directly */
1609	mov	LMTX_C_REG32, M_STATE(LMTX_REG)
1610
1611 	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG
1612	mov	LMTX_A_REG, M_OWNER(LMTX_REG)	/* record owner of mutex */
1613#if	MACH_LDEBUG
1614	test	LMTX_A_REG, LMTX_A_REG
1615	jz	1f
1616	incl	TH_MUTEX_COUNT(LMTX_A_REG)	/* lock statistic */
16171:
1618#endif	/* MACH_LDEBUG */
1619
1620Llml_acquired:
1621	testl	$(M_WAITERS_MSK), M_STATE(LMTX_REG)
1622	jnz	1f
1623	mov	M_OWNER(LMTX_REG), LMTX_A_REG
1624	mov	TH_WAS_PROMOTED_ON_WAKEUP(LMTX_A_REG), LMTX_A_REG32
1625	test	LMTX_A_REG32, LMTX_A_REG32
1626	jz	Llml_finish
16271:
1628	LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
1629	jmp	Llml_finish
1630
16313:					/* interlock held, mutex busy */
1632	LMTX_CHK_EXTENDED		/* is this an extended mutex */
1633	je	4f
1634	LMTX_UPDATE_WAIT
16354:
1636	LMTX_CALLEXT1(lck_mtx_lock_wait_x86)
1637	jmp	Llml_contended
16385:
1639	PREEMPTION_ENABLE
16406:
1641	PAUSE
1642	jmp	2b
1643
1644
1645
1646NONLEAF_ENTRY(lck_mtx_try_lock_spin)
1647	LOAD_LMTX_REG(B_ARG0)			/* fetch lock pointer */
1648
1649	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1650	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32	/* is the interlock or mutex held */
1651	jnz	Llmts_slow
1652Llmts_try:					/* no - can't be INDIRECT, DESTROYED or locked */
1653	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
1654	or	$(M_ILOCKED_MSK | M_SPIN_MSK), LMTX_C_REG
1655
1656	PREEMPTION_DISABLE
1657	lock
1658	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
1659	jne	Llmts_busy_disabled
1660
1661 	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG
1662	mov	LMTX_A_REG, M_OWNER(LMTX_REG)	/* record owner of mutex */
1663#if	MACH_LDEBUG
1664	test	LMTX_A_REG, LMTX_A_REG
1665	jz	1f
1666	incl	TH_MUTEX_COUNT(LMTX_A_REG)	/* lock statistic */
16671:
1668#endif	/* MACH_LDEBUG */
1669
1670	LMTX_CHK_EXTENDED_EXIT
1671	leave
1672
1673#if	CONFIG_DTRACE
1674	mov	$1, LMTX_RET_REG	/* return success */
1675	LOCKSTAT_LABEL(_lck_mtx_try_lock_spin_lockstat_patch_point)
1676	ret
1677	/* inherit lock pointer in LMTX_REG above */
1678	LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, LMTX_REG)
1679#endif
1680	mov	$1, LMTX_RET_REG	/* return success */
1681	ret
1682
1683Llmts_slow:
1684	test	$(M_ILOCKED_MSK), LMTX_C_REG32	/* is the interlock held */
1685	jz	Llmts_fail			/* no, must be held as a mutex */
1686
1687	cmp	$(MUTEX_DESTROYED), LMTX_C_REG32	/* check to see if its marked destroyed */
1688	je	lck_mtx_destroyed
1689	cmp	$(MUTEX_IND), LMTX_C_REG32	/* Is this an indirect mutex? */
1690	jne	Llmts_loop1
1691
1692	LMTX_ENTER_EXTENDED
1693Llmts_loop:
1694	PAUSE
1695	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1696Llmts_loop1:
1697	test	$(M_MLOCKED_MSK | M_SPIN_MSK), LMTX_C_REG32
1698	jnz	Llmts_fail
1699	test	$(M_ILOCKED_MSK), LMTX_C_REG32
1700	jz	Llmts_try
1701	jmp	Llmts_loop
1702
1703Llmts_busy_disabled:
1704	PREEMPTION_ENABLE
1705	jmp	Llmts_loop
1706
1707
1708
1709NONLEAF_ENTRY(lck_mtx_try_lock)
1710	LOAD_LMTX_REG(B_ARG0)			/* fetch lock pointer */
1711
1712	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1713	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32	/* is the interlock or mutex held */
1714	jnz	Llmt_slow
1715Llmt_try:					/* no - can't be INDIRECT, DESTROYED or locked */
1716	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
1717	or	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32
1718
1719	PREEMPTION_DISABLE
1720	lock
1721	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
1722	jne	Llmt_busy_disabled
1723
1724 	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG
1725	mov	LMTX_A_REG, M_OWNER(LMTX_REG)	/* record owner of mutex */
1726#if	MACH_LDEBUG
1727	test	LMTX_A_REG, LMTX_A_REG
1728	jz	1f
1729	incl	TH_MUTEX_COUNT(LMTX_A_REG)	/* lock statistic */
17301:
1731#endif	/* MACH_LDEBUG */
1732
1733	LMTX_CHK_EXTENDED_EXIT
1734
1735	test	$(M_WAITERS_MSK), LMTX_C_REG32
1736	jz	0f
1737
1738	LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
17390:
1740	andl	$(~M_ILOCKED_MSK), M_STATE(LMTX_REG)
1741	PREEMPTION_ENABLE
1742
1743	leave
1744#if	CONFIG_DTRACE
1745	mov	$1, LMTX_RET_REG		/* return success */
1746	/* Dtrace probe: LS_LCK_MTX_TRY_LOCK_ACQUIRE */
1747	LOCKSTAT_LABEL(_lck_mtx_try_lock_lockstat_patch_point)
1748	ret
1749	/* inherit lock pointer in LMTX_REG from above */
1750	LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, LMTX_REG)
1751#endif
1752	mov	$1, LMTX_RET_REG		/* return success */
1753	ret
1754
1755Llmt_slow:
1756	test	$(M_ILOCKED_MSK), LMTX_C_REG32	/* is the interlock held */
1757	jz	Llmt_fail			/* no, must be held as a mutex */
1758
1759	cmp	$(MUTEX_DESTROYED), LMTX_C_REG32	/* check to see if its marked destroyed */
1760	je	lck_mtx_destroyed
1761	cmp	$(MUTEX_IND), LMTX_C_REG32	/* Is this an indirect mutex? */
1762	jne	Llmt_loop
1763
1764	LMTX_ENTER_EXTENDED
1765Llmt_loop:
1766	PAUSE
1767	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1768Llmt_loop1:
1769	test	$(M_MLOCKED_MSK | M_SPIN_MSK), LMTX_C_REG32
1770	jnz	Llmt_fail
1771	test	$(M_ILOCKED_MSK), LMTX_C_REG32
1772	jz	Llmt_try
1773	jmp	Llmt_loop
1774
1775Llmt_busy_disabled:
1776	PREEMPTION_ENABLE
1777	jmp	Llmt_loop
1778
1779
1780Llmt_fail:
1781Llmts_fail:
1782	LMTX_CHK_EXTENDED		/* is this an extended mutex */
1783	je	0f
1784	LMTX_UPDATE_MISS
1785	LMTX_EXIT_EXTENDED
17860:
1787	xor	LMTX_RET_REG, LMTX_RET_REG
1788	NONLEAF_RET
1789
1790
1791
1792NONLEAF_ENTRY(lck_mtx_convert_spin)
1793	LOAD_LMTX_REG(B_ARG0)			/* fetch lock pointer */
1794
1795	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1796	cmp	$(MUTEX_IND), LMTX_C_REG32	/* Is this an indirect mutex? */
1797	jne	0f
1798	mov	M_PTR(LMTX_REG), LMTX_REG	/* If so, take indirection */
1799	mov	M_STATE(LMTX_REG), LMTX_C_REG32
18000:
1801	test	$(M_MLOCKED_MSK), LMTX_C_REG32	/* already owned as a mutex, just return */
1802	jnz	2f
1803	test	$(M_WAITERS_MSK), LMTX_C_REG32	/* are there any waiters? */
1804	jz	1f
1805
1806	LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
1807	mov	M_STATE(LMTX_REG), LMTX_C_REG32
18081:
1809	and	$(~(M_ILOCKED_MSK | M_SPIN_MSK)), LMTX_C_REG32	/* convert from spin version to mutex */
1810	or	$(M_MLOCKED_MSK), LMTX_C_REG32
1811	mov	LMTX_C_REG32, M_STATE(LMTX_REG)		/* since I own the interlock, I don't need an atomic update */
1812
1813	PREEMPTION_ENABLE
18142:
1815	NONLEAF_RET
1816
1817
1818
1819#if	defined(__i386__)
1820NONLEAF_ENTRY(lck_mtx_unlock)
1821	LOAD_LMTX_REG(B_ARG0)			/* fetch lock pointer */
1822	mov	M_OWNER(LMTX_REG), LMTX_A_REG
1823	test	LMTX_A_REG, LMTX_A_REG
1824	jnz	Llmu_entry
1825	leave
1826	ret
1827NONLEAF_ENTRY(lck_mtx_unlock_darwin10)
1828#else
1829NONLEAF_ENTRY(lck_mtx_unlock)
1830#endif
1831	LOAD_LMTX_REG(B_ARG0)			/* fetch lock pointer */
1832Llmu_entry:
1833	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1834Llmu_prim:
1835	cmp	$(MUTEX_IND), LMTX_C_REG32	/* Is this an indirect mutex? */
1836	je	Llmu_ext
1837
1838Llmu_chktype:
1839	test	$(M_MLOCKED_MSK), LMTX_C_REG32	/* check for full mutex */
1840	jz	Llmu_unlock
1841Llmu_mutex:
1842	test	$(M_ILOCKED_MSK), LMTX_C_REG	/* have to wait for interlock to clear */
1843	jnz	Llmu_busy
1844
1845	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
1846	and	$(~M_MLOCKED_MSK), LMTX_C_REG32	/* drop mutex */
1847	or	$(M_ILOCKED_MSK), LMTX_C_REG32	/* pick up interlock */
1848
1849	PREEMPTION_DISABLE
1850	lock
1851	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
1852	jne	Llmu_busy_disabled		/* branch on failure to spin loop */
1853
1854Llmu_unlock:
1855	xor	LMTX_A_REG, LMTX_A_REG
1856	mov	LMTX_A_REG, M_OWNER(LMTX_REG)
1857	mov	LMTX_C_REG, LMTX_A_REG			/* keep original state in %ecx for later evaluation */
1858	and	$(~(M_ILOCKED_MSK | M_SPIN_MSK | M_PROMOTED_MSK)), LMTX_A_REG
1859
1860	test	$(M_WAITERS_MSK), LMTX_A_REG32
1861	jz	2f
1862	dec	LMTX_A_REG32				/* decrement waiter count */
18632:
1864	mov	LMTX_A_REG32, M_STATE(LMTX_REG)		/* since I own the interlock, I don't need an atomic update */
1865
1866#if	MACH_LDEBUG
1867	/* perform lock statistics after drop to prevent delay */
1868	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG
1869	test	LMTX_A_REG, LMTX_A_REG
1870	jz	1f
1871	decl	TH_MUTEX_COUNT(LMTX_A_REG)	/* lock statistic */
18721:
1873#endif	/* MACH_LDEBUG */
1874
1875	test	$(M_PROMOTED_MSK | M_WAITERS_MSK), LMTX_C_REG32
1876	jz	3f
1877
1878	LMTX_CALLEXT2(lck_mtx_unlock_wakeup_x86, LMTX_C_REG)
18793:
1880	PREEMPTION_ENABLE
1881
1882	LMTX_CHK_EXTENDED
1883	jne	4f
1884
1885	leave
1886#if	CONFIG_DTRACE
1887	/* Dtrace: LS_LCK_MTX_UNLOCK_RELEASE */
1888	LOCKSTAT_LABEL(_lck_mtx_unlock_lockstat_patch_point)
1889	ret
1890	/* inherit lock pointer in LMTX_REG from above */
1891	LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, LMTX_REG)
1892#endif
1893	ret
18944:
1895	leave
1896#if	CONFIG_DTRACE
1897	/* Dtrace: LS_LCK_MTX_EXT_UNLOCK_RELEASE */
1898	LOCKSTAT_LABEL(_lck_mtx_ext_unlock_lockstat_patch_point)
1899	ret
1900	/* inherit lock pointer in LMTX_REG from above */
1901	LOCKSTAT_RECORD(LS_LCK_MTX_EXT_UNLOCK_RELEASE, LMTX_REG)
1902#endif
1903	ret
1904
1905
1906Llmu_busy_disabled:
1907	PREEMPTION_ENABLE
1908Llmu_busy:
1909	PAUSE
1910	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1911	jmp	Llmu_mutex
1912
1913Llmu_ext:
1914	mov	M_PTR(LMTX_REG), LMTX_REG
1915	mov	M_OWNER(LMTX_REG), LMTX_A_REG
1916	mov	%gs:CPU_ACTIVE_THREAD, LMTX_C_REG
1917	CHECK_UNLOCK(LMTX_C_REG, LMTX_A_REG)
1918	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1919	jmp 	Llmu_chktype
1920
1921
1922
1923LEAF_ENTRY(lck_mtx_ilk_unlock)
1924	LOAD_LMTX_REG(L_ARG0)			/* fetch lock pointer - no indirection here */
1925
1926	andl	$(~M_ILOCKED_MSK), M_STATE(LMTX_REG)
1927
1928	PREEMPTION_ENABLE			/* need to re-enable preemption */
1929
1930	LEAF_RET
1931
1932
1933
1934LEAF_ENTRY(lck_mtx_lock_grab_mutex)
1935	LOAD_LMTX_REG(L_ARG0)			/* fetch lock pointer - no indirection here */
1936
1937	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1938
1939	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32	/* can't have the mutex yet */
1940	jnz	3f
1941
1942	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
1943	or	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32
1944
1945	PREEMPTION_DISABLE
1946	lock
1947	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
1948	jne	2f				/* branch on failure to spin loop */
1949
1950 	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG
1951	mov	LMTX_A_REG, M_OWNER(LMTX_REG)	/* record owner of mutex */
1952#if	MACH_LDEBUG
1953	test	LMTX_A_REG, LMTX_A_REG
1954	jz	1f
1955	incl	TH_MUTEX_COUNT(LMTX_A_REG)	/* lock statistic */
19561:
1957#endif	/* MACH_LDEBUG */
1958
1959	mov	$1, LMTX_RET_REG		/* return success */
1960	LEAF_RET
19612:
1962	PREEMPTION_ENABLE
19633:
1964	xor	LMTX_RET_REG, LMTX_RET_REG	/* return failure */
1965	LEAF_RET
1966
1967
1968
1969LEAF_ENTRY(lck_mtx_lock_mark_destroyed)
1970	LOAD_LMTX_REG(L_ARG0)
19711:
1972	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1973	cmp	$(MUTEX_IND), LMTX_C_REG32	/* Is this an indirect mutex? */
1974	jne	2f
1975
1976	movl	$(MUTEX_DESTROYED), M_STATE(LMTX_REG)	/* convert to destroyed state */
1977	jmp	3f
19782:
1979	test	$(M_ILOCKED_MSK), LMTX_C_REG	/* have to wait for interlock to clear */
1980	jnz	5f
1981
1982	PREEMPTION_DISABLE
1983	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
1984	or	$(M_ILOCKED_MSK), LMTX_C_REG32
1985	lock
1986	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
1987	jne	4f				/* branch on failure to spin loop */
1988	movl	$(MUTEX_DESTROYED), M_STATE(LMTX_REG)	/* convert to destroyed state */
1989	PREEMPTION_ENABLE
19903:
1991	LEAF_RET				/* return with M_ILOCKED set */
19924:
1993	PREEMPTION_ENABLE
19945:
1995	PAUSE
1996	jmp	1b
1997
1998LEAF_ENTRY(preemption_underflow_panic)
1999	FRAME
2000	incl	%gs:CPU_PREEMPTION_LEVEL
2001	ALIGN_STACK()
2002	LOAD_STRING_ARG0(16f)
2003	CALL_PANIC()
2004	hlt
2005	.data
200616:	String	"Preemption level underflow, possible cause unlocking an unlocked mutex or spinlock"
2007	.text
2008
2009
2010LEAF_ENTRY(_disable_preemption)
2011#if	MACH_RT
2012	PREEMPTION_DISABLE
2013#endif	/* MACH_RT */
2014	LEAF_RET
2015
2016LEAF_ENTRY(_enable_preemption)
2017#if	MACH_RT
2018#if	MACH_ASSERT
2019	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL
2020	jg	1f
2021#if __i386__
2022	pushl	%gs:CPU_PREEMPTION_LEVEL
2023#else
2024	movl	%gs:CPU_PREEMPTION_LEVEL,%esi
2025#endif
2026	ALIGN_STACK()
2027	LOAD_STRING_ARG0(_enable_preemption_less_than_zero)
2028	CALL_PANIC()
2029	hlt
2030	.cstring
2031_enable_preemption_less_than_zero:
2032	.asciz	"_enable_preemption: preemption_level(%d)  < 0!"
2033	.text
20341:
2035#endif	/* MACH_ASSERT */
2036	PREEMPTION_ENABLE
2037#endif	/* MACH_RT */
2038	LEAF_RET
2039
2040LEAF_ENTRY(_enable_preemption_no_check)
2041#if	MACH_RT
2042#if	MACH_ASSERT
2043	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL
2044	jg	1f
2045	ALIGN_STACK()
2046	LOAD_STRING_ARG0(_enable_preemption_no_check_less_than_zero)
2047	CALL_PANIC()
2048	hlt
2049	.cstring
2050_enable_preemption_no_check_less_than_zero:
2051	.asciz	"_enable_preemption_no_check: preemption_level <= 0!"
2052	.text
20531:
2054#endif	/* MACH_ASSERT */
2055	_ENABLE_PREEMPTION_NO_CHECK
2056#endif	/* MACH_RT */
2057	LEAF_RET
2058
2059
2060LEAF_ENTRY(_mp_disable_preemption)
2061#if	MACH_RT
2062	PREEMPTION_DISABLE
2063#endif	/* MACH_RT */
2064	LEAF_RET
2065
2066LEAF_ENTRY(_mp_enable_preemption)
2067#if	MACH_RT
2068#if	MACH_ASSERT
2069	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL
2070	jg	1f
2071#if __i386__
2072	pushl	%gs:CPU_PREEMPTION_LEVEL
2073#else
2074	movl	%gs:CPU_PREEMPTION_LEVEL,%esi
2075#endif
2076	ALIGN_PANIC()
2077	LOAD_STRING_ARG0(_mp_enable_preemption_less_than_zero)
2078	CALL_PANIC()
2079	hlt
2080	.cstring
2081_mp_enable_preemption_less_than_zero:
2082	.asciz "_mp_enable_preemption: preemption_level (%d) <= 0!"
2083	.text
20841:
2085#endif	/* MACH_ASSERT */
2086	PREEMPTION_ENABLE
2087#endif	/* MACH_RT */
2088	LEAF_RET
2089
2090LEAF_ENTRY(_mp_enable_preemption_no_check)
2091#if	MACH_RT
2092#if	MACH_ASSERT
2093	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL
2094	jg	1f
2095	ALIGN_STACK()
2096	LOAD_STRING_ARG0(_mp_enable_preemption_no_check_less_than_zero)
2097	CALL_PANIC()
2098	hlt
2099	.cstring
2100_mp_enable_preemption_no_check_less_than_zero:
2101	.asciz "_mp_enable_preemption_no_check: preemption_level <= 0!"
2102	.text
21031:
2104#endif	/* MACH_ASSERT */
2105	_ENABLE_PREEMPTION_NO_CHECK
2106#endif	/* MACH_RT */
2107	LEAF_RET
2108
2109#if __i386__
2110
2111LEAF_ENTRY(i_bit_set)
2112	movl	L_ARG0,%edx
2113	movl	L_ARG1,%eax
2114	lock
2115	bts	%edx,(%eax)
2116	LEAF_RET
2117
2118LEAF_ENTRY(i_bit_clear)
2119	movl	L_ARG0,%edx
2120	movl	L_ARG1,%eax
2121	lock
2122	btr	%edx,(%eax)
2123	LEAF_RET
2124
2125
2126LEAF_ENTRY(bit_lock)
2127	movl	L_ARG0,%ecx
2128	movl	L_ARG1,%eax
21291:
2130	lock
2131	bts	%ecx,(%eax)
2132	jb	1b
2133	LEAF_RET
2134
2135
2136LEAF_ENTRY(bit_lock_try)
2137	movl	L_ARG0,%ecx
2138	movl	L_ARG1,%eax
2139	lock
2140	bts	%ecx,(%eax)
2141	jb	bit_lock_failed
2142	LEAF_RET		/* %eax better not be null ! */
2143bit_lock_failed:
2144	xorl	%eax,%eax
2145	LEAF_RET
2146
2147LEAF_ENTRY(bit_unlock)
2148	movl	L_ARG0,%ecx
2149	movl	L_ARG1,%eax
2150	lock
2151	btr	%ecx,(%eax)
2152	LEAF_RET
2153
2154/*
2155 * Atomic primitives, prototyped in kern/simple_lock.h
2156 */
2157LEAF_ENTRY(hw_atomic_add)
2158	movl	L_ARG0, %ecx		/* Load address of operand */
2159	movl	L_ARG1, %eax		/* Load addend */
2160	movl	%eax, %edx
2161	lock
2162	xaddl	%eax, (%ecx)		/* Atomic exchange and add */
2163	addl	%edx, %eax		/* Calculate result */
2164	LEAF_RET
2165
2166LEAF_ENTRY(hw_atomic_sub)
2167	movl	L_ARG0, %ecx		/* Load address of operand */
2168	movl	L_ARG1, %eax		/* Load subtrahend */
2169	negl	%eax
2170	movl	%eax, %edx
2171	lock
2172	xaddl	%eax, (%ecx)		/* Atomic exchange and add */
2173	addl	%edx, %eax		/* Calculate result */
2174	LEAF_RET
2175
2176LEAF_ENTRY(hw_atomic_or)
2177	movl	L_ARG0, %ecx		/* Load address of operand */
2178	movl	(%ecx), %eax
21791:
2180	movl	L_ARG1, %edx		/* Load mask */
2181	orl	%eax, %edx
2182	lock
2183	cmpxchgl	%edx, (%ecx)	/* Atomic CAS */
2184	jne	1b
2185	movl	%edx, %eax		/* Result */
2186	LEAF_RET
2187/*
2188 * A variant of hw_atomic_or which doesn't return a value.
2189 * The implementation is thus comparatively more efficient.
2190 */
2191
2192LEAF_ENTRY(hw_atomic_or_noret)
2193	movl	L_ARG0, %ecx		/* Load address of operand */
2194	movl	L_ARG1, %edx		/* Load mask */
2195	lock
2196	orl	%edx, (%ecx)		/* Atomic OR */
2197	LEAF_RET
2198
2199LEAF_ENTRY(hw_atomic_and)
2200	movl	L_ARG0, %ecx		/* Load address of operand */
2201	movl	(%ecx), %eax
22021:
2203	movl	L_ARG1, %edx		/* Load mask */
2204	andl	%eax, %edx
2205	lock
2206	cmpxchgl	%edx, (%ecx)	/* Atomic CAS */
2207	jne	1b
2208	movl	%edx, %eax		/* Result */
2209	LEAF_RET
2210/*
2211 * A variant of hw_atomic_and which doesn't return a value.
2212 * The implementation is thus comparatively more efficient.
2213 */
2214
2215LEAF_ENTRY(hw_atomic_and_noret)
2216	movl	L_ARG0, %ecx		/* Load address of operand */
2217	movl	L_ARG1, %edx		/* Load mask */
2218	lock
2219	andl	%edx, (%ecx)		/* Atomic AND */
2220	LEAF_RET
2221
2222#else /* !__i386__ */
2223
2224LEAF_ENTRY(i_bit_set)
2225	lock
2226	bts	%edi,(%rsi)
2227	LEAF_RET
2228
2229LEAF_ENTRY(i_bit_clear)
2230	lock
2231	btr	%edi,(%rsi)
2232	LEAF_RET
2233
2234
2235LEAF_ENTRY(bit_lock)
22361:
2237	lock
2238	bts	%edi,(%rsi)
2239	jb	1b
2240	LEAF_RET
2241
2242
2243LEAF_ENTRY(bit_lock_try)
2244	lock
2245	bts	%edi,(%rsi)
2246	jb	bit_lock_failed
2247	movl	$1, %eax
2248	LEAF_RET
2249bit_lock_failed:
2250	xorl	%eax,%eax
2251	LEAF_RET
2252
2253LEAF_ENTRY(bit_unlock)
2254	lock
2255	btr	%edi,(%rsi)
2256	LEAF_RET
2257
2258
2259/*
2260 * Atomic primitives, prototyped in kern/simple_lock.h
2261 */
2262LEAF_ENTRY(hw_atomic_add)
2263#if	MACH_LDEBUG
2264	test	$3, %rdi
2265	jz	1f
2266	ud2
22671:
2268#endif
2269	movl	%esi, %eax		/* Load addend */
2270	lock 	xaddl %eax, (%rdi)		/* Atomic exchange and add */
2271	addl	%esi, %eax		/* Calculate result */
2272	LEAF_RET
2273
2274LEAF_ENTRY(hw_atomic_sub)
2275#if	MACH_LDEBUG
2276	test	$3, %rdi
2277	jz	1f
2278	ud2
22791:
2280#endif
2281	negl	%esi
2282	movl	%esi, %eax
2283	lock	xaddl %eax, (%rdi)		/* Atomic exchange and add */
2284	addl	%esi, %eax		/* Calculate result */
2285	LEAF_RET
2286
2287LEAF_ENTRY(hw_atomic_or)
2288#if	MACH_LDEBUG
2289	test	$3, %rdi
2290	jz	1f
2291	ud2
22921:
2293#endif
2294	movl	(%rdi), %eax
22951:
2296	movl	%esi, %edx		/* Load mask */
2297	orl	%eax, %edx
2298	lock	cmpxchgl %edx, (%rdi)	/* Atomic CAS */
2299	jne	1b
2300	movl	%edx, %eax		/* Result */
2301	LEAF_RET
2302/*
2303 * A variant of hw_atomic_or which doesn't return a value.
2304 * The implementation is thus comparatively more efficient.
2305 */
2306
2307LEAF_ENTRY(hw_atomic_or_noret)
2308#if	MACH_LDEBUG
2309	test	$3, %rdi
2310	jz	1f
2311	ud2
23121:
2313#endif
2314	lock
2315	orl	%esi, (%rdi)		/* Atomic OR */
2316	LEAF_RET
2317
2318
2319LEAF_ENTRY(hw_atomic_and)
2320#if	MACH_LDEBUG
2321	test	$3, %rdi
2322	jz	1f
2323	ud2
23241:
2325#endif
2326	movl	(%rdi), %eax
23271:
2328	movl	%esi, %edx		/* Load mask */
2329	andl	%eax, %edx
2330	lock	cmpxchgl %edx, (%rdi)	/* Atomic CAS */
2331	jne	1b
2332	movl	%edx, %eax		/* Result */
2333	LEAF_RET
2334/*
2335 * A variant of hw_atomic_and which doesn't return a value.
2336 * The implementation is thus comparatively more efficient.
2337 */
2338
2339LEAF_ENTRY(hw_atomic_and_noret)
2340#if	MACH_LDEBUG
2341	test	$3, %rdi
2342	jz	1f
2343	ud2
23441:
2345#endif
2346	lock	andl	%esi, (%rdi)		/* Atomic OR */
2347	LEAF_RET
2348
2349#endif /* !__i386 __ */
2350