1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1989 Carnegie-Mellon University
34 * All rights reserved.  The CMU software License Agreement specifies
35 * the terms and conditions for use and redistribution.
36 */
37
38#include <mach_rt.h>
39#include <platforms.h>
40#include <mach_ldebug.h>
41#include <i386/asm.h>
42#include <i386/eflags.h>
43#include <i386/trap.h>
44#include <config_dtrace.h>
45#include <i386/mp.h>
46
47#include "assym.s"
48
49#define	PAUSE		rep; nop
50
51#include <i386/pal_lock_asm.h>
52
53/*
54 *	When performance isn't the only concern, it's
55 *	nice to build stack frames...
56 */
57#define	BUILD_STACK_FRAMES   (GPROF)
58
59#if	BUILD_STACK_FRAMES
60
61/* Stack-frame-relative: */
62#define	L_PC		B_PC
63#define	L_ARG0		B_ARG0
64#define	L_ARG1		B_ARG1
65
66#define LEAF_ENTRY(name)	\
67	Entry(name);		\
68	FRAME;			\
69	MCOUNT
70
71#define LEAF_ENTRY2(n1,n2)	\
72	Entry(n1);		\
73	Entry(n2);		\
74	FRAME;			\
75	MCOUNT
76
77#define LEAF_RET		\
78	EMARF;			\
79	ret
80
81#else	/* BUILD_STACK_FRAMES */
82
83/* Stack-pointer-relative: */
84#define	L_PC		S_PC
85#define	L_ARG0		S_ARG0
86#define	L_ARG1		S_ARG1
87
88#define LEAF_ENTRY(name)	\
89	Entry(name)
90
91#define LEAF_ENTRY2(n1,n2)	\
92	Entry(n1);		\
93	Entry(n2)
94
95#define LEAF_RET		\
96	ret
97
98#endif	/* BUILD_STACK_FRAMES */
99
100
101/* Non-leaf routines always have a stack frame: */
102
103#define NONLEAF_ENTRY(name)	\
104	Entry(name);		\
105	FRAME;			\
106	MCOUNT
107
108#define NONLEAF_ENTRY2(n1,n2)	\
109	Entry(n1);		\
110	Entry(n2);		\
111	FRAME;			\
112	MCOUNT
113
114#define NONLEAF_RET		\
115	EMARF;			\
116	ret
117
118
119/* For x86_64, the varargs ABI requires that %al indicate
120 * how many SSE register contain arguments. In our case, 0 */
121#define ALIGN_STACK() 		and  $0xFFFFFFFFFFFFFFF0, %rsp ;
122#define LOAD_STRING_ARG0(label)	leaq label(%rip), %rdi ;
123#define LOAD_ARG1(x)		mov x, %esi ;
124#define LOAD_PTR_ARG1(x)	mov x, %rsi ;
125#define CALL_PANIC()		xorb %al,%al ; call EXT(panic) ;
126
127#define	CHECK_UNLOCK(current, owner)				\
128	cmp	current, owner				;	\
129	je	1f					;	\
130	ALIGN_STACK()					;	\
131	LOAD_STRING_ARG0(2f)				;	\
132	CALL_PANIC()					;	\
133	hlt						;	\
134	.data						;	\
1352:	String	"Mutex unlock attempted from non-owner thread";	\
136	.text						;	\
1371:
138
139#if	MACH_LDEBUG
140/*
141 *  Routines for general lock debugging.
142 */
143
144/*
145 * Checks for expected lock types and calls "panic" on
146 * mismatch.  Detects calls to Mutex functions with
147 * type simplelock and vice versa.
148 */
149#define	CHECK_MUTEX_TYPE()					\
150	cmpl	$ MUTEX_TAG,M_TYPE			;	\
151	je	1f					;	\
152	ALIGN_STACK()					;	\
153	LOAD_STRING_ARG0(2f)				;	\
154	CALL_PANIC()					;	\
155	hlt						;	\
156	.data						;	\
1572:	String	"not a mutex!"				;	\
158	.text						;	\
1591:
160
161/*
162 * If one or more simplelocks are currently held by a thread,
163 * an attempt to acquire a mutex will cause this check to fail
164 * (since a mutex lock may context switch, holding a simplelock
165 * is not a good thing).
166 */
167#if	MACH_RT
168#define CHECK_PREEMPTION_LEVEL()				\
169	cmpl	$0,%gs:CPU_HIBERNATE			;	\
170	jne	1f					;	\
171	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL		;	\
172	je	1f					;	\
173	ALIGN_STACK()					;	\
174	movl	%gs:CPU_PREEMPTION_LEVEL, %eax		;	\
175	LOAD_ARG1(%eax)					;	\
176	LOAD_STRING_ARG0(2f)				;	\
177	CALL_PANIC()					;	\
178	hlt						;	\
179	.data						;	\
1802:	String	"preemption_level(%d) != 0!"		;	\
181	.text						;	\
1821:
183#else	/* MACH_RT */
184#define	CHECK_PREEMPTION_LEVEL()
185#endif	/* MACH_RT */
186
187#define	CHECK_MYLOCK(current, owner)				\
188	cmp	current, owner				;	\
189	jne	1f					;	\
190	ALIGN_STACK()					;	\
191	LOAD_STRING_ARG0(2f)				;	\
192	CALL_PANIC()					;	\
193	hlt						;	\
194	.data						;	\
1952:	String	"Attempt to recursively lock a non-recursive lock";	\
196	.text						;	\
1971:
198
199#else	/* MACH_LDEBUG */
200#define	CHECK_MUTEX_TYPE()
201#define CHECK_PREEMPTION_LEVEL()
202#define	CHECK_MYLOCK(thd)
203#endif	/* MACH_LDEBUG */
204
205#define PREEMPTION_DISABLE				\
206	incl	%gs:CPU_PREEMPTION_LEVEL
207
208#define	PREEMPTION_LEVEL_DEBUG 1
209#if	PREEMPTION_LEVEL_DEBUG
210#define	PREEMPTION_ENABLE				\
211	decl	%gs:CPU_PREEMPTION_LEVEL	;	\
212	js	17f				;	\
213	jnz	19f				;	\
214	testl	$AST_URGENT,%gs:CPU_PENDING_AST	;	\
215	jz	19f				;	\
216	PUSHF					;	\
217	testl	$EFL_IF, S_PC			;	\
218	jz	18f				;	\
219	POPF					;	\
220	int	$(T_PREEMPT)			;	\
221	jmp	19f				;	\
22217:							\
223	call	_preemption_underflow_panic	;	\
22418:							\
225	POPF					;	\
22619:
227#else
228#define	PREEMPTION_ENABLE				\
229	decl	%gs:CPU_PREEMPTION_LEVEL	;	\
230	jnz	19f				;	\
231	testl	$AST_URGENT,%gs:CPU_PENDING_AST	;	\
232	jz	19f				;	\
233	PUSHF					;	\
234	testl	$EFL_IF, S_PC			;	\
235	jz	18f				;	\
236	POPF					;	\
237	int	$(T_PREEMPT)			;	\
238	jmp	19f				;	\
23918:							\
240	POPF					;	\
24119:
242#endif
243
244
245#if	CONFIG_DTRACE
246
247       .globl  _lockstat_probe
248       .globl  _lockstat_probemap
249
250/*
251 * LOCKSTAT_LABEL creates a dtrace symbol which contains
252 * a pointer into the lock code function body. At that
253 * point is a "ret" instruction that can be patched into
254 * a "nop"
255 */
256
257#define        LOCKSTAT_LABEL(lab) \
258       .data                                       ;\
259       .globl  lab                                 ;\
260       lab:                                        ;\
261       .quad 9f                                    ;\
262       .text                                       ;\
263       9:
264
265#define LOCKSTAT_RECORD(id, lck) \
266       push    %rbp                                ;       \
267       mov     %rsp,%rbp                           ;       \
268       movl    _lockstat_probemap + (id * 4)(%rip),%eax ;  \
269       test    %eax,%eax                           ;       \
270       je              9f                          ;       \
271       mov             lck, %rsi                   ;       \
272       mov             %rax, %rdi                  ;       \
273       mov             $0, %rdx                    ;       \
274       mov             $0, %rcx                    ;       \
275       mov             $0, %r8                     ;       \
276       mov             $0, %r9                     ;       \
277       call    *_lockstat_probe(%rip)              ;       \
2789:	leave
279	/* ret - left to subsequent code, e.g. return values */
280
281#endif /* CONFIG_DTRACE */
282
283/*
284 * For most routines, the hw_lock_t pointer is loaded into a
285 * register initially, and then either a byte or register-sized
286 * word is loaded/stored to the pointer
287 */
288
289#define	HW_LOCK_REGISTER	%rdi
290#define	LOAD_HW_LOCK_REGISTER
291#define	HW_LOCK_THREAD_REGISTER	%rcx
292#define	LOAD_HW_LOCK_THREAD_REGISTER mov %gs:CPU_ACTIVE_THREAD, HW_LOCK_THREAD_REGISTER
293#define	HW_LOCK_MOV_WORD	movq
294#define	HW_LOCK_EXAM_REGISTER	%rax
295
296/*
297 *	void hw_lock_init(hw_lock_t)
298 *
299 *	Initialize a hardware lock.
300 */
301LEAF_ENTRY(hw_lock_init)
302	LOAD_HW_LOCK_REGISTER		/* fetch lock pointer */
303	HW_LOCK_MOV_WORD $0, (HW_LOCK_REGISTER)		/* clear the lock */
304	LEAF_RET
305
306
307/*
308 *	void hw_lock_byte_init(volatile uint8_t *)
309 *
310 *	Initialize a hardware byte lock.
311 */
312LEAF_ENTRY(hw_lock_byte_init)
313	LOAD_HW_LOCK_REGISTER		/* fetch lock pointer */
314	movb $0, (HW_LOCK_REGISTER)		/* clear the lock */
315	LEAF_RET
316
317/*
318 *	void hw_lock_lock(hw_lock_t)
319 *
320 *	Acquire lock, spinning until it becomes available.
321 *	MACH_RT:  also return with preemption disabled.
322 */
323LEAF_ENTRY(hw_lock_lock)
324	LOAD_HW_LOCK_REGISTER		/* fetch lock pointer */
325	LOAD_HW_LOCK_THREAD_REGISTER	/* get thread pointer */
326
327	PREEMPTION_DISABLE
3281:
329	mov	(HW_LOCK_REGISTER), HW_LOCK_EXAM_REGISTER
330	test	HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER		/* lock locked? */
331	jne	3f			/* branch if so */
332	lock; cmpxchg	HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER)	/* try to acquire the HW lock */
333	jne	3f
334	movl	$1,%eax			/* In case this was a timeout call */
335	LEAF_RET			/* if yes, then nothing left to do */
3363:
337	PAUSE				/* pause for hyper-threading */
338	jmp	1b			/* try again */
339
340/*
341 *	void	hw_lock_byte_lock(uint8_t *lock_byte)
342 *
343 *	Acquire byte sized lock operand, spinning until it becomes available.
344 *	MACH_RT:  also return with preemption disabled.
345 */
346
347LEAF_ENTRY(hw_lock_byte_lock)
348	LOAD_HW_LOCK_REGISTER		/* Load lock pointer */
349	PREEMPTION_DISABLE
350	movl	$1, %ecx		/* Set lock value */
3511:
352	movb	(HW_LOCK_REGISTER), %al		/* Load byte at address */
353	testb	%al,%al			/* lock locked? */
354	jne	3f			/* branch if so */
355	lock; cmpxchg	%cl,(HW_LOCK_REGISTER)	/* attempt atomic compare exchange */
356	jne	3f
357	LEAF_RET			/* if yes, then nothing left to do */
3583:
359	PAUSE				/* pause for hyper-threading */
360	jmp	1b			/* try again */
361
362/*
363 *	unsigned int hw_lock_to(hw_lock_t, unsigned int)
364 *
365 *	Acquire lock, spinning until it becomes available or timeout.
366 *	MACH_RT:  also return with preemption disabled.
367 */
368LEAF_ENTRY(hw_lock_to)
3691:
370	LOAD_HW_LOCK_REGISTER		/* fetch lock pointer */
371	LOAD_HW_LOCK_THREAD_REGISTER
372
373	/*
374	 * Attempt to grab the lock immediately
375	 * - fastpath without timeout nonsense.
376	 */
377	PREEMPTION_DISABLE
378
379	mov	(HW_LOCK_REGISTER), HW_LOCK_EXAM_REGISTER
380	test	HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER		/* lock locked? */
381	jne	2f			/* branch if so */
382	lock; cmpxchg	HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER)	/* try to acquire the HW lock */
383	jne	2f			/* branch on failure */
384	movl	$1,%eax
385	LEAF_RET
386
3872:
388#define	INNER_LOOP_COUNT	1000
389	/*
390	 * Failed to get the lock so set the timeout
391	 * and then spin re-checking the lock but pausing
392	 * every so many (INNER_LOOP_COUNT) spins to check for timeout.
393	 */
394	push	%r9
395	lfence
396	rdtsc				/* read cyclecount into %edx:%eax */
397	shlq	$32, %rdx
398	orq	%rdx, %rax		/* load 64-bit quantity into %rax */
399	addq	%rax, %rsi		/* %rsi is the timeout expiry */
400
4014:
402	/*
403	 * The inner-loop spin to look for the lock being freed.
404	 */
405	mov	$(INNER_LOOP_COUNT),%r9
4065:
407	PAUSE				/* pause for hyper-threading */
408	mov	(HW_LOCK_REGISTER),HW_LOCK_EXAM_REGISTER		/* spin checking lock value in cache */
409	test	HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER
410	je	6f			/* zero => unlocked, try to grab it */
411	decq	%r9			/* decrement inner loop count */
412	jnz	5b			/* time to check for timeout? */
413
414	/*
415	 * Here after spinning INNER_LOOP_COUNT times, check for timeout
416	 */
417	lfence
418	rdtsc				/* cyclecount into %edx:%eax */
419	shlq	$32, %rdx
420	orq	%rdx, %rax		/* load 64-bit quantity into %rax */
421	cmpq	%rsi, %rax		/* compare to timeout */
422	jb	4b			/* continue spinning if less, or */
423	xor	%rax,%rax		/* with 0 return value */
424	pop	%r9
425	LEAF_RET
426
4276:
428	/*
429	 * Here to try to grab the lock that now appears to be free
430	 * after contention.
431	 */
432	LOAD_HW_LOCK_THREAD_REGISTER
433	lock; cmpxchg	HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER)	/* try to acquire the HW lock */
434	jne	4b			/* no - spin again */
435	movl	$1,%eax			/* yes */
436	pop	%r9
437	LEAF_RET
438
439/*
440 *	void hw_lock_unlock(hw_lock_t)
441 *
442 *	Unconditionally release lock.
443 *	MACH_RT:  release preemption level.
444 */
445LEAF_ENTRY(hw_lock_unlock)
446	LOAD_HW_LOCK_REGISTER		/* fetch lock pointer */
447	HW_LOCK_MOV_WORD $0, (HW_LOCK_REGISTER)		/* clear the lock */
448	PREEMPTION_ENABLE
449	LEAF_RET
450
451/*
452 *	void hw_lock_byte_unlock(uint8_t *lock_byte)
453 *
454 *	Unconditionally release byte sized lock operand.
455 *	MACH_RT:  release preemption level.
456 */
457
458LEAF_ENTRY(hw_lock_byte_unlock)
459	LOAD_HW_LOCK_REGISTER		/* Load lock pointer */
460	movb $0, (HW_LOCK_REGISTER)		/* Clear the lock byte */
461	PREEMPTION_ENABLE
462	LEAF_RET
463
464/*
465 *	unsigned int hw_lock_try(hw_lock_t)
466 *	MACH_RT:  returns with preemption disabled on success.
467 */
468LEAF_ENTRY(hw_lock_try)
469	LOAD_HW_LOCK_REGISTER		/* fetch lock pointer */
470	LOAD_HW_LOCK_THREAD_REGISTER
471	PREEMPTION_DISABLE
472
473	mov	(HW_LOCK_REGISTER),HW_LOCK_EXAM_REGISTER
474	test	HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER
475	jne	1f
476	lock; cmpxchg	HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER)	/* try to acquire the HW lock */
477	jne	1f
478
479	movl	$1,%eax			/* success */
480	LEAF_RET
481
4821:
483	PREEMPTION_ENABLE		/* failure:  release preemption... */
484	xorl	%eax,%eax		/* ...and return failure */
485	LEAF_RET
486
487/*
488 *	unsigned int hw_lock_held(hw_lock_t)
489 *	MACH_RT:  doesn't change preemption state.
490 *	N.B.  Racy, of course.
491 */
492LEAF_ENTRY(hw_lock_held)
493	LOAD_HW_LOCK_REGISTER		/* fetch lock pointer */
494	mov	(HW_LOCK_REGISTER),HW_LOCK_EXAM_REGISTER		/* check lock value */
495	test	HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER
496	movl	$1,%ecx
497	cmovne	%ecx,%eax		/* 0 => unlocked, 1 => locked */
498	LEAF_RET
499
500
501/*
502 * Reader-writer lock fastpaths. These currently exist for the
503 * shared lock acquire, the exclusive lock acquire, the shared to
504 * exclusive upgrade and the release paths (where they reduce overhead
505 * considerably) -- these are by far the most frequently used routines
506 *
507 * The following should reflect the layout of the bitfield embedded within
508 * the lck_rw_t structure (see i386/locks.h).
509 */
510#define LCK_RW_INTERLOCK	(0x1 << 16)
511
512#define LCK_RW_PRIV_EXCL	(0x1 << 24)
513#define LCK_RW_WANT_UPGRADE	(0x2 << 24)
514#define LCK_RW_WANT_WRITE	(0x4 << 24)
515#define LCK_R_WAITING		(0x8 << 24)
516#define LCK_W_WAITING		(0x10 << 24)
517
518#define LCK_RW_SHARED_MASK	(0xffff)
519
520/*
521 * For most routines, the lck_rw_t pointer is loaded into a
522 * register initially, and the flags bitfield loaded into another
523 * register and examined
524 */
525
526#define	LCK_RW_REGISTER	%rdi
527#define	LOAD_LCK_RW_REGISTER
528#define	LCK_RW_FLAGS_REGISTER	%eax
529#define	LOAD_LCK_RW_FLAGS_REGISTER mov (LCK_RW_REGISTER), LCK_RW_FLAGS_REGISTER
530
531#define	RW_LOCK_SHARED_MASK (LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE)
532/*
533 *	void lck_rw_lock_shared(lck_rw_t *)
534 *
535 */
536Entry(lck_rw_lock_shared)
537	mov	%gs:CPU_ACTIVE_THREAD, %rcx	/* Load thread pointer */
538	incl	TH_RWLOCK_COUNT(%rcx)		/* Increment count before atomic CAS */
539	LOAD_LCK_RW_REGISTER
5401:
541	LOAD_LCK_RW_FLAGS_REGISTER		/* Load state bitfield and interlock */
542	testl	$(RW_LOCK_SHARED_MASK), %eax	/* Eligible for fastpath? */
543	jne	3f
544
545	movl	%eax, %ecx			/* original value in %eax for cmpxchgl */
546	incl	%ecx				/* Increment reader refcount */
547	lock
548	cmpxchgl %ecx, (LCK_RW_REGISTER)			/* Attempt atomic exchange */
549	jne	2f
550
551#if	CONFIG_DTRACE
552	/*
553	 * Dtrace lockstat event: LS_LCK_RW_LOCK_SHARED_ACQUIRE
554	 * Implemented by swapping between return and no-op instructions.
555	 * See bsd/dev/dtrace/lockstat.c.
556	 */
557	LOCKSTAT_LABEL(_lck_rw_lock_shared_lockstat_patch_point)
558	ret
559	/*
560	Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER
561	*/
562	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER)
563#endif
564	ret
5652:
566	PAUSE
567	jmp	1b
5683:
569	jmp	EXT(lck_rw_lock_shared_gen)
570
571
572
573#define	RW_TRY_LOCK_SHARED_MASK (LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE)
574/*
575 *	void lck_rw_try_lock_shared(lck_rw_t *)
576 *
577 */
578Entry(lck_rw_try_lock_shared)
579	LOAD_LCK_RW_REGISTER
5801:
581	LOAD_LCK_RW_FLAGS_REGISTER		/* Load state bitfield and interlock */
582	testl	$(LCK_RW_INTERLOCK), %eax
583	jne	2f
584	testl	$(RW_TRY_LOCK_SHARED_MASK), %eax
585	jne	3f			/* lock is busy */
586
587	movl	%eax, %ecx			/* original value in %eax for cmpxchgl */
588	incl	%ecx				/* Increment reader refcount */
589	lock
590	cmpxchgl %ecx, (LCK_RW_REGISTER)			/* Attempt atomic exchange */
591	jne	2f
592
593	mov	%gs:CPU_ACTIVE_THREAD, %rcx	/* Load thread pointer */
594	incl	TH_RWLOCK_COUNT(%rcx)		/* Increment count on success. */
595	/* There is a 3 instr window where preemption may not notice rwlock_count after cmpxchg */
596
597#if	CONFIG_DTRACE
598	movl	$1, %eax
599	/*
600	 * Dtrace lockstat event: LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE
601	 * Implemented by swapping between return and no-op instructions.
602	 * See bsd/dev/dtrace/lockstat.c.
603	 */
604	LOCKSTAT_LABEL(_lck_rw_try_lock_shared_lockstat_patch_point)
605	ret
606	/* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER  */
607	LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER)
608#endif
609	movl	$1, %eax			/* return TRUE */
610	ret
6112:
612	PAUSE
613	jmp	1b
6143:
615	xorl	%eax, %eax
616	ret
617
618
619#define	RW_LOCK_EXCLUSIVE_HELD	(LCK_RW_WANT_WRITE | LCK_RW_WANT_UPGRADE)
620/*
621 *	int lck_rw_grab_shared(lck_rw_t *)
622 *
623 */
624Entry(lck_rw_grab_shared)
625	LOAD_LCK_RW_REGISTER
6261:
627	LOAD_LCK_RW_FLAGS_REGISTER		/* Load state bitfield and interlock */
628	testl	$(LCK_RW_INTERLOCK), %eax
629	jne	5f
630	testl	$(RW_LOCK_EXCLUSIVE_HELD), %eax
631	jne	3f
6322:
633	movl	%eax, %ecx			/* original value in %eax for cmpxchgl */
634	incl	%ecx				/* Increment reader refcount */
635	lock
636	cmpxchgl %ecx, (LCK_RW_REGISTER)			/* Attempt atomic exchange */
637	jne	4f
638
639	movl	$1, %eax			/* return success */
640	ret
6413:
642	testl	$(LCK_RW_SHARED_MASK), %eax
643	je	4f
644	testl	$(LCK_RW_PRIV_EXCL), %eax
645	je	2b
6464:
647	xorl	%eax, %eax			/* return failure */
648	ret
6495:
650	PAUSE
651	jmp	1b
652
653
654
655#define	RW_LOCK_EXCLUSIVE_MASK (LCK_RW_SHARED_MASK | LCK_RW_INTERLOCK | \
656	                        LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE)
657/*
658 *	void lck_rw_lock_exclusive(lck_rw_t*)
659 *
660 */
661Entry(lck_rw_lock_exclusive)
662	mov	%gs:CPU_ACTIVE_THREAD, %rcx	/* Load thread pointer */
663	incl	TH_RWLOCK_COUNT(%rcx)		/* Increment count before atomic CAS */
664	LOAD_LCK_RW_REGISTER
6651:
666	LOAD_LCK_RW_FLAGS_REGISTER		/* Load state bitfield, interlock and shared count */
667	testl	$(RW_LOCK_EXCLUSIVE_MASK), %eax		/* Eligible for fastpath? */
668	jne	3f					/* no, go slow */
669
670	movl	%eax, %ecx				/* original value in %eax for cmpxchgl */
671	orl	$(LCK_RW_WANT_WRITE), %ecx
672	lock
673	cmpxchgl %ecx, (LCK_RW_REGISTER)			/* Attempt atomic exchange */
674	jne	2f
675
676#if	CONFIG_DTRACE
677	/*
678	 * Dtrace lockstat event: LS_LCK_RW_LOCK_EXCL_ACQUIRE
679	 * Implemented by swapping between return and no-op instructions.
680	 * See bsd/dev/dtrace/lockstat.c.
681	 */
682	LOCKSTAT_LABEL(_lck_rw_lock_exclusive_lockstat_patch_point)
683	ret
684	/* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER  */
685	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, LCK_RW_REGISTER)
686#endif
687	ret
6882:
689	PAUSE
690	jmp	1b
6913:
692	jmp	EXT(lck_rw_lock_exclusive_gen)
693
694
695
696#define	RW_TRY_LOCK_EXCLUSIVE_MASK (LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE)
697/*
698 *	void lck_rw_try_lock_exclusive(lck_rw_t *)
699 *
700 *		Tries to get a write lock.
701 *
702 *		Returns FALSE if the lock is not held on return.
703 */
704Entry(lck_rw_try_lock_exclusive)
705	LOAD_LCK_RW_REGISTER
7061:
707	LOAD_LCK_RW_FLAGS_REGISTER		/* Load state bitfield, interlock and shared count */
708	testl	$(LCK_RW_INTERLOCK), %eax
709	jne	2f
710	testl	$(RW_TRY_LOCK_EXCLUSIVE_MASK), %eax
711	jne	3f					/* can't get it */
712
713	movl	%eax, %ecx				/* original value in %eax for cmpxchgl */
714	orl	$(LCK_RW_WANT_WRITE), %ecx
715	lock
716	cmpxchgl %ecx, (LCK_RW_REGISTER)			/* Attempt atomic exchange */
717	jne	2f
718
719	mov	%gs:CPU_ACTIVE_THREAD, %rcx	/* Load thread pointer */
720	incl	TH_RWLOCK_COUNT(%rcx)		/* Increment count on success. */
721	/* There is a 3 instr window where preemption may not notice rwlock_count after cmpxchg */
722
723#if	CONFIG_DTRACE
724	movl	$1, %eax
725	/*
726	 * Dtrace lockstat event: LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE
727	 * Implemented by swapping between return and no-op instructions.
728	 * See bsd/dev/dtrace/lockstat.c.
729	 */
730	LOCKSTAT_LABEL(_lck_rw_try_lock_exclusive_lockstat_patch_point)
731	ret
732	/* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER  */
733	LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, LCK_RW_REGISTER)
734#endif
735	movl	$1, %eax			/* return TRUE */
736	ret
7372:
738	PAUSE
739	jmp	1b
7403:
741	xorl	%eax, %eax			/* return FALSE */
742	ret
743
744
745
746/*
747 *	void lck_rw_lock_shared_to_exclusive(lck_rw_t*)
748 *
749 *	fastpath can be taken if
750 *	the current rw_shared_count == 1
751 *	AND the interlock is clear
752 *	AND RW_WANT_UPGRADE is not set
753 *
754 *	note that RW_WANT_WRITE could be set, but will not
755 *	be indicative of an exclusive hold since we have
756 * 	a read count on the lock that we have not yet released
757 *	we can blow by that state since the lck_rw_lock_exclusive
758 * 	function will block until rw_shared_count == 0 and
759 * 	RW_WANT_UPGRADE is clear... it does this check behind
760 *	the interlock which we are also checking for
761 *
762 * 	to make the transition we must be able to atomically
763 *	set RW_WANT_UPGRADE and get rid of the read count we hold
764 */
765Entry(lck_rw_lock_shared_to_exclusive)
766	LOAD_LCK_RW_REGISTER
7671:
768	LOAD_LCK_RW_FLAGS_REGISTER		/* Load state bitfield, interlock and shared count */
769	testl	$(LCK_RW_INTERLOCK), %eax
770	jne	7f
771	testl	$(LCK_RW_WANT_UPGRADE), %eax
772	jne	2f
773
774	movl	%eax, %ecx			/* original value in %eax for cmpxchgl */
775	orl	$(LCK_RW_WANT_UPGRADE), %ecx	/* ask for WANT_UPGRADE */
776	decl	%ecx				/* and shed our read count */
777	lock
778	cmpxchgl %ecx, (LCK_RW_REGISTER)			/* Attempt atomic exchange */
779	jne	7f
780						/* we now own the WANT_UPGRADE */
781	testl	$(LCK_RW_SHARED_MASK), %ecx	/* check to see if all of the readers are drained */
782	jne	8f				/* if not, we need to go wait */
783
784#if	CONFIG_DTRACE
785	movl	$1, %eax
786	/*
787	 * Dtrace lockstat event: LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE
788	 * Implemented by swapping between return and no-op instructions.
789	 * See bsd/dev/dtrace/lockstat.c.
790	 */
791	LOCKSTAT_LABEL(_lck_rw_lock_shared_to_exclusive_lockstat_patch_point)
792	ret
793    /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER  */
794    LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, LCK_RW_REGISTER)
795#endif
796	movl	$1, %eax			/* return success */
797	ret
798
7992:						/* someone else already holds WANT_UPGRADE */
800	movl	%eax, %ecx			/* original value in %eax for cmpxchgl */
801	decl	%ecx				/* shed our read count */
802	testl	$(LCK_RW_SHARED_MASK), %ecx
803	jne	3f				/* we were the last reader */
804	andl	$(~LCK_W_WAITING), %ecx		/* so clear the wait indicator */
8053:
806	lock
807	cmpxchgl %ecx, (LCK_RW_REGISTER)			/* Attempt atomic exchange */
808	jne	7f
809
810	mov	%eax, %esi			/* put old flags as second arg */
811						/* lock is alread in %rdi */
812	call	EXT(lck_rw_lock_shared_to_exclusive_failure)
813	ret					/* and pass the failure return along */
8147:
815	PAUSE
816	jmp	1b
8178:
818	jmp	EXT(lck_rw_lock_shared_to_exclusive_success)
819
820
821
822	.cstring
823rwl_release_error_str:
824	.asciz  "Releasing non-exclusive RW lock without a reader refcount!"
825	.text
826
827/*
828 *	lck_rw_type_t lck_rw_done(lck_rw_t *)
829 *
830 */
831Entry(lck_rw_done)
832	LOAD_LCK_RW_REGISTER
8331:
834	LOAD_LCK_RW_FLAGS_REGISTER		/* Load state bitfield, interlock and reader count */
835	testl   $(LCK_RW_INTERLOCK), %eax
836	jne     7f				/* wait for interlock to clear */
837
838	movl	%eax, %ecx			/* keep original value in %eax for cmpxchgl */
839	testl	$(LCK_RW_SHARED_MASK), %ecx	/* if reader count == 0, must be exclusive lock */
840	je	2f
841	decl	%ecx				/* Decrement reader count */
842	testl	$(LCK_RW_SHARED_MASK), %ecx	/* if reader count has now gone to 0, check for waiters */
843	je	4f
844	jmp	6f
8452:
846	testl	$(LCK_RW_WANT_UPGRADE), %ecx
847	je	3f
848	andl	$(~LCK_RW_WANT_UPGRADE), %ecx
849	jmp	4f
8503:
851	testl	$(LCK_RW_WANT_WRITE), %ecx
852	je	8f				/* lock is not 'owned', go panic */
853	andl	$(~LCK_RW_WANT_WRITE), %ecx
8544:
855	/*
856	 * test the original values to match what
857	 * lck_rw_done_gen is going to do to determine
858	 * which wakeups need to happen...
859	 *
860	 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
861	 */
862	testl	$(LCK_W_WAITING), %eax
863	je	5f
864	andl	$(~LCK_W_WAITING), %ecx
865
866	testl	$(LCK_RW_PRIV_EXCL), %eax
867	jne	6f
8685:
869	andl	$(~LCK_R_WAITING), %ecx
8706:
871	lock
872	cmpxchgl %ecx, (LCK_RW_REGISTER)			/* Attempt atomic exchange */
873	jne	7f
874
875	mov	%eax,%esi	/* old flags in %rsi */
876				/* lock is in %rdi already */
877	call	EXT(lck_rw_done_gen)
878	ret
8797:
880	PAUSE
881	jmp	1b
8828:
883	ALIGN_STACK()
884	LOAD_STRING_ARG0(rwl_release_error_str)
885	CALL_PANIC()
886
887
888
889/*
890 *	lck_rw_type_t lck_rw_lock_exclusive_to_shared(lck_rw_t *)
891 *
892 */
893Entry(lck_rw_lock_exclusive_to_shared)
894	LOAD_LCK_RW_REGISTER
8951:
896	LOAD_LCK_RW_FLAGS_REGISTER		/* Load state bitfield, interlock and reader count */
897	testl   $(LCK_RW_INTERLOCK), %eax
898	jne     6f				/* wait for interlock to clear */
899
900	movl	%eax, %ecx			/* keep original value in %eax for cmpxchgl */
901	incl	%ecx				/* Increment reader count */
902
903	testl	$(LCK_RW_WANT_UPGRADE), %ecx
904	je	2f
905	andl	$(~LCK_RW_WANT_UPGRADE), %ecx
906	jmp	3f
9072:
908	andl	$(~LCK_RW_WANT_WRITE), %ecx
9093:
910	/*
911	 * test the original values to match what
912	 * lck_rw_lock_exclusive_to_shared_gen is going to do to determine
913	 * which wakeups need to happen...
914	 *
915	 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
916	 */
917	testl	$(LCK_W_WAITING), %eax
918	je	4f
919	testl	$(LCK_RW_PRIV_EXCL), %eax
920	jne	5f
9214:
922	andl	$(~LCK_R_WAITING), %ecx
9235:
924	lock
925	cmpxchgl %ecx, (LCK_RW_REGISTER)			/* Attempt atomic exchange */
926	jne	6f
927
928	mov	%eax,%esi
929	call	EXT(lck_rw_lock_exclusive_to_shared_gen)
930	ret
9316:
932	PAUSE
933	jmp	1b
934
935
936
937/*
938 *	int lck_rw_grab_want(lck_rw_t *)
939 *
940 */
941Entry(lck_rw_grab_want)
942	LOAD_LCK_RW_REGISTER
9431:
944	LOAD_LCK_RW_FLAGS_REGISTER		/* Load state bitfield, interlock and reader count */
945	testl   $(LCK_RW_INTERLOCK), %eax
946	jne     3f				/* wait for interlock to clear */
947	testl	$(LCK_RW_WANT_WRITE), %eax	/* want_write has been grabbed by someone else */
948	jne	2f				/* go return failure */
949
950	movl	%eax, %ecx			/* original value in %eax for cmpxchgl */
951	orl	$(LCK_RW_WANT_WRITE), %ecx
952	lock
953	cmpxchgl %ecx, (LCK_RW_REGISTER)			/* Attempt atomic exchange */
954	jne	2f
955						/* we now own want_write */
956	movl	$1, %eax			/* return success */
957	ret
9582:
959	xorl	%eax, %eax			/* return failure */
960	ret
9613:
962	PAUSE
963	jmp	1b
964
965
966#define	RW_LOCK_SHARED_OR_UPGRADE_MASK (LCK_RW_SHARED_MASK | LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE)
967/*
968 *	int lck_rw_held_read_or_upgrade(lck_rw_t *)
969 *
970 */
971Entry(lck_rw_held_read_or_upgrade)
972	LOAD_LCK_RW_REGISTER
973	LOAD_LCK_RW_FLAGS_REGISTER		/* Load state bitfield, interlock and reader count */
974	andl	$(RW_LOCK_SHARED_OR_UPGRADE_MASK), %eax
975	ret
976
977
978
979/*
980 * N.B.: On x86, statistics are currently recorded for all indirect mutexes.
981 * Also, only the acquire attempt count (GRP_MTX_STAT_UTIL) is maintained
982 * as a 64-bit quantity (this matches the existing PowerPC implementation,
983 * and the new x86 specific statistics are also maintained as 32-bit
984 * quantities).
985 *
986 *
987 * Enable this preprocessor define to record the first miss alone
988 * By default, we count every miss, hence multiple misses may be
989 * recorded for a single lock acquire attempt via lck_mtx_lock
990 */
991#undef LOG_FIRST_MISS_ALONE
992
993/*
994 * This preprocessor define controls whether the R-M-W update of the
995 * per-group statistics elements are atomic (LOCK-prefixed)
996 * Enabled by default.
997 */
998#define ATOMIC_STAT_UPDATES 1
999
1000#if defined(ATOMIC_STAT_UPDATES)
1001#define LOCK_IF_ATOMIC_STAT_UPDATES lock
1002#else
1003#define LOCK_IF_ATOMIC_STAT_UPDATES
1004#endif /* ATOMIC_STAT_UPDATES */
1005
1006
1007/*
1008 * For most routines, the lck_mtx_t pointer is loaded into a
1009 * register initially, and the owner field checked for indirection.
1010 * Eventually the lock owner is loaded into a register and examined.
1011 */
1012
1013#define M_OWNER		MUTEX_OWNER
1014#define M_PTR		MUTEX_PTR
1015#define M_STATE		MUTEX_STATE
1016
1017#define LMTX_ARG0	%rdi
1018#define LMTX_ARG1	%rsi
1019#define LMTX_REG_ORIG	%rdi
1020#define	LMTX_REG	%rdx
1021#define LMTX_A_REG	%rax
1022#define LMTX_A_REG32	%eax
1023#define LMTX_C_REG	%rcx
1024#define LMTX_C_REG32	%ecx
1025#define LMTX_RET_REG	%rax
1026#define LMTX_RET_REG32	%eax
1027#define LMTX_LGROUP_REG	%r10
1028#define LMTX_SSTATE_REG	%r11
1029#define	LOAD_LMTX_REG(arg)	mov %rdi, %rdx
1030#define LMTX_CHK_EXTENDED	cmp LMTX_REG, LMTX_REG_ORIG
1031#define LMTX_ASSERT_OWNED	cmp $(MUTEX_ASSERT_OWNED), LMTX_ARG1
1032
1033#define LMTX_ENTER_EXTENDED					\
1034	mov	M_PTR(LMTX_REG), LMTX_REG 		;	\
1035	xor	LMTX_SSTATE_REG, LMTX_SSTATE_REG	;	\
1036	mov	MUTEX_GRP(LMTX_REG), LMTX_LGROUP_REG 	;	\
1037	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
1038	incq	GRP_MTX_STAT_UTIL(LMTX_LGROUP_REG)
1039
1040#define LMTX_EXIT_EXTENDED
1041
1042#define	LMTX_CHK_EXTENDED_EXIT
1043
1044
1045#if	LOG_FIRST_MISS_ALONE
1046#define LMTX_UPDATE_MISS					\
1047	test	$1, LMTX_SSTATE_REG 			;	\
1048	jnz	11f					;	\
1049	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
1050	incl	GRP_MTX_STAT_MISS(LMTX_LGROUP_REG)	;	\
1051	or	$1, LMTX_SSTATE_REG			;	\
105211:
1053#else
1054#define LMTX_UPDATE_MISS					\
1055	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
1056	incl	GRP_MTX_STAT_MISS(LMTX_LGROUP_REG)
1057#endif
1058
1059
1060#if	LOG_FIRST_MISS_ALONE
1061#define LMTX_UPDATE_WAIT					\
1062	test	$2, LMTX_SSTATE_REG 			;	\
1063	jnz	11f					;	\
1064	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
1065	incl	GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG)	;	\
1066	or	$2, LMTX_SSTATE_REG			;	\
106711:
1068#else
1069#define LMTX_UPDATE_WAIT					\
1070	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
1071	incl	GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG)
1072#endif
1073
1074
1075/*
1076 * Record the "direct wait" statistic, which indicates if a
1077 * miss proceeded to block directly without spinning--occurs
1078 * if the owner of the mutex isn't running on another processor
1079 * at the time of the check.
1080 */
1081#define LMTX_UPDATE_DIRECT_WAIT					\
1082	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
1083	incl	GRP_MTX_STAT_DIRECT_WAIT(LMTX_LGROUP_REG)
1084
1085
1086#define LMTX_CALLEXT1(func_name)		\
1087	LMTX_CHK_EXTENDED		;	\
1088	je	12f			;	\
1089	push	LMTX_LGROUP_REG		;	\
1090	push	LMTX_SSTATE_REG		;	\
109112:	push	LMTX_REG_ORIG		;	\
1092	push	LMTX_REG		;	\
1093	mov	LMTX_REG, LMTX_ARG0	;	\
1094	call	EXT(func_name)		;	\
1095	pop	LMTX_REG		;	\
1096	pop	LMTX_REG_ORIG		;	\
1097	LMTX_CHK_EXTENDED		;	\
1098	je	12f			;	\
1099	pop	LMTX_SSTATE_REG		;	\
1100	pop	LMTX_LGROUP_REG		;	\
110112:
1102
1103#define LMTX_CALLEXT2(func_name, reg)		\
1104	LMTX_CHK_EXTENDED		;	\
1105	je	12f			;	\
1106	push	LMTX_LGROUP_REG		;	\
1107	push	LMTX_SSTATE_REG		;	\
110812:	push	LMTX_REG_ORIG		;	\
1109	push	LMTX_REG		;	\
1110	mov	reg, LMTX_ARG1		;	\
1111	mov	LMTX_REG, LMTX_ARG0	;	\
1112	call	EXT(func_name)		;	\
1113	pop	LMTX_REG		;	\
1114	pop	LMTX_REG_ORIG		;	\
1115	LMTX_CHK_EXTENDED		;	\
1116	je	12f			;	\
1117	pop	LMTX_SSTATE_REG		;	\
1118	pop	LMTX_LGROUP_REG		;	\
111912:
1120
1121
1122#define M_WAITERS_MSK		0x0000ffff
1123#define M_PRIORITY_MSK		0x00ff0000
1124#define M_ILOCKED_MSK		0x01000000
1125#define M_MLOCKED_MSK		0x02000000
1126#define M_PROMOTED_MSK		0x04000000
1127#define M_SPIN_MSK		0x08000000
1128
1129/*
1130 *	void lck_mtx_assert(lck_mtx_t* l, unsigned int)
1131 *	Takes the address of a lock, and an assertion type as parameters.
1132 *	The assertion can take one of two forms determine by the type
1133 *	parameter: either the lock is held by the current thread, and the
1134 *	type is	LCK_MTX_ASSERT_OWNED, or it isn't and the type is
1135 *	LCK_MTX_ASSERT_NOTOWNED. Calls panic on assertion failure.
1136 *
1137 */
1138
1139NONLEAF_ENTRY(lck_mtx_assert)
1140        LOAD_LMTX_REG(B_ARG0)	                   	/* Load lock address */
1141	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG	/* Load current thread */
1142
1143	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1144	cmp	$(MUTEX_IND), LMTX_C_REG32	/* Is this an indirect mutex? */
1145	jne	0f
1146	mov	M_PTR(LMTX_REG), LMTX_REG	/* If so, take indirection */
11470:
1148	mov	M_OWNER(LMTX_REG), LMTX_C_REG	/* Load owner */
1149	LMTX_ASSERT_OWNED
1150	jne	2f				/* Assert ownership? */
1151	cmp	LMTX_A_REG, LMTX_C_REG		/* Current thread match? */
1152	jne	3f				/* no, go panic */
1153	testl	$(M_ILOCKED_MSK | M_MLOCKED_MSK), M_STATE(LMTX_REG)
1154	je	3f
11551:						/* yes, we own it */
1156	NONLEAF_RET
11572:
1158	cmp	LMTX_A_REG, LMTX_C_REG		/* Current thread match? */
1159	jne	1b				/* No, return */
1160	ALIGN_STACK()
1161	LOAD_PTR_ARG1(LMTX_REG)
1162	LOAD_STRING_ARG0(mutex_assert_owned_str)
1163	jmp	4f
11643:
1165	ALIGN_STACK()
1166	LOAD_PTR_ARG1(LMTX_REG)
1167	LOAD_STRING_ARG0(mutex_assert_not_owned_str)
11684:
1169	CALL_PANIC()
1170
1171
1172lck_mtx_destroyed:
1173	ALIGN_STACK()
1174	LOAD_PTR_ARG1(LMTX_REG)
1175	LOAD_STRING_ARG0(mutex_interlock_destroyed_str)
1176	CALL_PANIC()
1177
1178
1179.data
1180mutex_assert_not_owned_str:
1181	.asciz	"mutex (%p) not owned\n"
1182mutex_assert_owned_str:
1183	.asciz	"mutex (%p) owned\n"
1184mutex_interlock_destroyed_str:
1185	.asciz	"trying to interlock destroyed mutex (%p)"
1186.text
1187
1188
1189
1190/*
1191 * lck_mtx_lock()
1192 * lck_mtx_try_lock()
1193 * lck_mtx_unlock()
1194 * lck_mtx_lock_spin()
1195 * lck_mtx_lock_spin_always()
1196 * lck_mtx_try_lock_spin()
1197 * lck_mtx_try_lock_spin_always()
1198 * lck_mtx_convert_spin()
1199 */
1200NONLEAF_ENTRY(lck_mtx_lock_spin_always)
1201	LOAD_LMTX_REG(B_ARG0)           /* fetch lock pointer */
1202	jmp     Llmls_avoid_check
1203
1204NONLEAF_ENTRY(lck_mtx_lock_spin)
1205	LOAD_LMTX_REG(B_ARG0)		/* fetch lock pointer */
1206
1207	CHECK_PREEMPTION_LEVEL()
1208Llmls_avoid_check:
1209	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1210	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32	/* is the interlock or mutex held */
1211	jnz	Llmls_slow
1212Llmls_try:					/* no - can't be INDIRECT, DESTROYED or locked */
1213	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
1214	or	$(M_ILOCKED_MSK | M_SPIN_MSK), LMTX_C_REG32
1215
1216	PREEMPTION_DISABLE
1217	lock
1218	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
1219	jne	Llmls_busy_disabled
1220
1221 	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG
1222	mov	LMTX_A_REG, M_OWNER(LMTX_REG)	/* record owner of interlock */
1223#if	MACH_LDEBUG
1224	test	LMTX_A_REG, LMTX_A_REG
1225	jz	1f
1226	incl	TH_MUTEX_COUNT(LMTX_A_REG)	/* lock statistic */
12271:
1228#endif	/* MACH_LDEBUG */
1229
1230	LMTX_CHK_EXTENDED_EXIT
1231	/* return with the interlock held and preemption disabled */
1232	leave
1233#if	CONFIG_DTRACE
1234	LOCKSTAT_LABEL(_lck_mtx_lock_spin_lockstat_patch_point)
1235	ret
1236	/* inherit lock pointer in LMTX_REG above */
1237	LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, LMTX_REG)
1238#endif
1239	ret
1240
1241Llmls_slow:
1242	test	$M_ILOCKED_MSK, LMTX_C_REG32		/* is the interlock held */
1243	jz	Llml_contended				/* no, must have been the mutex */
1244
1245	cmp	$(MUTEX_DESTROYED), LMTX_C_REG32	/* check to see if its marked destroyed */
1246	je	lck_mtx_destroyed
1247	cmp	$(MUTEX_IND), LMTX_C_REG32		/* Is this an indirect mutex */
1248	jne	Llmls_loop				/* no... must be interlocked */
1249
1250	LMTX_ENTER_EXTENDED
1251
1252	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1253	test	$(M_SPIN_MSK), LMTX_C_REG32
1254	jz	Llmls_loop1
1255
1256	LMTX_UPDATE_MISS		/* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
1257Llmls_loop:
1258	PAUSE
1259	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1260Llmls_loop1:
1261	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32
1262	jz	Llmls_try
1263	test	$(M_MLOCKED_MSK), LMTX_C_REG32
1264	jnz	Llml_contended				/* mutex owned by someone else, go contend for it */
1265	jmp	Llmls_loop
1266
1267Llmls_busy_disabled:
1268	PREEMPTION_ENABLE
1269	jmp	Llmls_loop
1270
1271
1272
1273NONLEAF_ENTRY(lck_mtx_lock)
1274	LOAD_LMTX_REG(B_ARG0)		/* fetch lock pointer */
1275
1276	CHECK_PREEMPTION_LEVEL()
1277
1278	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1279	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32	/* is the interlock or mutex held */
1280	jnz	Llml_slow
1281Llml_try:					/* no - can't be INDIRECT, DESTROYED or locked */
1282	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
1283	or	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32
1284
1285	PREEMPTION_DISABLE
1286	lock
1287	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
1288	jne	Llml_busy_disabled
1289
1290 	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG
1291	mov	LMTX_A_REG, M_OWNER(LMTX_REG)	/* record owner of mutex */
1292#if	MACH_LDEBUG
1293	test	LMTX_A_REG, LMTX_A_REG
1294	jz	1f
1295	incl	TH_MUTEX_COUNT(LMTX_A_REG)	/* lock statistic */
12961:
1297#endif	/* MACH_LDEBUG */
1298
1299	testl	$(M_WAITERS_MSK), M_STATE(LMTX_REG)
1300	jz	Llml_finish
1301
1302	LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
1303
1304Llml_finish:
1305	andl	$(~M_ILOCKED_MSK), M_STATE(LMTX_REG)
1306	PREEMPTION_ENABLE
1307
1308	LMTX_CHK_EXTENDED		/* is this an extended mutex */
1309	jne	2f
1310
1311	leave
1312#if	CONFIG_DTRACE
1313	LOCKSTAT_LABEL(_lck_mtx_lock_lockstat_patch_point)
1314	ret
1315	/* inherit lock pointer in LMTX_REG above */
1316	LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, LMTX_REG)
1317#endif
1318	ret
13192:
1320	LMTX_EXIT_EXTENDED
1321	leave
1322#if	CONFIG_DTRACE
1323	LOCKSTAT_LABEL(_lck_mtx_lock_ext_lockstat_patch_point)
1324	ret
1325	/* inherit lock pointer in LMTX_REG above */
1326	LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, LMTX_REG)
1327#endif
1328	ret
1329
1330
1331Llml_slow:
1332	test	$M_ILOCKED_MSK, LMTX_C_REG32		/* is the interlock held */
1333	jz	Llml_contended				/* no, must have been the mutex */
1334
1335	cmp	$(MUTEX_DESTROYED), LMTX_C_REG32	/* check to see if its marked destroyed */
1336	je	lck_mtx_destroyed
1337	cmp	$(MUTEX_IND), LMTX_C_REG32		/* Is this an indirect mutex? */
1338	jne	Llml_loop				/* no... must be interlocked */
1339
1340	LMTX_ENTER_EXTENDED
1341
1342	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1343	test	$(M_SPIN_MSK), LMTX_C_REG32
1344	jz	Llml_loop1
1345
1346	LMTX_UPDATE_MISS		/* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
1347Llml_loop:
1348	PAUSE
1349	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1350Llml_loop1:
1351	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32
1352	jz	Llml_try
1353	test	$(M_MLOCKED_MSK), LMTX_C_REG32
1354	jnz	Llml_contended				/* mutex owned by someone else, go contend for it */
1355	jmp	Llml_loop
1356
1357Llml_busy_disabled:
1358	PREEMPTION_ENABLE
1359	jmp	Llml_loop
1360
1361
1362Llml_contended:
1363	LMTX_CHK_EXTENDED		/* is this an extended mutex */
1364	je	0f
1365	LMTX_UPDATE_MISS
13660:
1367	LMTX_CALLEXT1(lck_mtx_lock_spinwait_x86)
1368
1369	test	LMTX_RET_REG, LMTX_RET_REG
1370	jz	Llml_acquired		/* acquired mutex, interlock held and preemption disabled */
1371
1372	cmp	$1, LMTX_RET_REG	/* check for direct wait status */
1373	je	2f
1374	LMTX_CHK_EXTENDED		/* is this an extended mutex */
1375	je	2f
1376	LMTX_UPDATE_DIRECT_WAIT
13772:
1378	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1379	test	$(M_ILOCKED_MSK), LMTX_C_REG32
1380	jnz	6f
1381
1382	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
1383	or	$(M_ILOCKED_MSK), LMTX_C_REG32	/* try to take the interlock */
1384
1385	PREEMPTION_DISABLE
1386	lock
1387	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
1388	jne	5f
1389
1390	test	$(M_MLOCKED_MSK), LMTX_C_REG32	/* we've got the interlock and */
1391	jnz	3f
1392	or	$(M_MLOCKED_MSK), LMTX_C_REG32	/* the mutex is free... grab it directly */
1393	mov	LMTX_C_REG32, M_STATE(LMTX_REG)
1394
1395 	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG
1396	mov	LMTX_A_REG, M_OWNER(LMTX_REG)	/* record owner of mutex */
1397#if	MACH_LDEBUG
1398	test	LMTX_A_REG, LMTX_A_REG
1399	jz	1f
1400	incl	TH_MUTEX_COUNT(LMTX_A_REG)	/* lock statistic */
14011:
1402#endif	/* MACH_LDEBUG */
1403
1404Llml_acquired:
1405	testl	$(M_WAITERS_MSK), M_STATE(LMTX_REG)
1406	jnz	1f
1407	mov	M_OWNER(LMTX_REG), LMTX_A_REG
1408	mov	TH_WAS_PROMOTED_ON_WAKEUP(LMTX_A_REG), LMTX_A_REG32
1409	test	LMTX_A_REG32, LMTX_A_REG32
1410	jz	Llml_finish
14111:
1412	LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
1413	jmp	Llml_finish
1414
14153:					/* interlock held, mutex busy */
1416	LMTX_CHK_EXTENDED		/* is this an extended mutex */
1417	je	4f
1418	LMTX_UPDATE_WAIT
14194:
1420	LMTX_CALLEXT1(lck_mtx_lock_wait_x86)
1421	jmp	Llml_contended
14225:
1423	PREEMPTION_ENABLE
14246:
1425	PAUSE
1426	jmp	2b
1427
1428
1429NONLEAF_ENTRY(lck_mtx_try_lock_spin_always)
1430	LOAD_LMTX_REG(B_ARG0)			/* fetch lock pointer */
1431	jmp     Llmts_avoid_check
1432
1433NONLEAF_ENTRY(lck_mtx_try_lock_spin)
1434	LOAD_LMTX_REG(B_ARG0)			/* fetch lock pointer */
1435
1436Llmts_avoid_check:
1437	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1438	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32	/* is the interlock or mutex held */
1439	jnz	Llmts_slow
1440Llmts_try:					/* no - can't be INDIRECT, DESTROYED or locked */
1441	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
1442	or	$(M_ILOCKED_MSK | M_SPIN_MSK), LMTX_C_REG
1443
1444	PREEMPTION_DISABLE
1445	lock
1446	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
1447	jne	Llmts_busy_disabled
1448
1449 	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG
1450	mov	LMTX_A_REG, M_OWNER(LMTX_REG)	/* record owner of mutex */
1451#if	MACH_LDEBUG
1452	test	LMTX_A_REG, LMTX_A_REG
1453	jz	1f
1454	incl	TH_MUTEX_COUNT(LMTX_A_REG)	/* lock statistic */
14551:
1456#endif	/* MACH_LDEBUG */
1457
1458	LMTX_CHK_EXTENDED_EXIT
1459	leave
1460
1461#if	CONFIG_DTRACE
1462	mov	$1, LMTX_RET_REG	/* return success */
1463	LOCKSTAT_LABEL(_lck_mtx_try_lock_spin_lockstat_patch_point)
1464	ret
1465	/* inherit lock pointer in LMTX_REG above */
1466	LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, LMTX_REG)
1467#endif
1468	mov	$1, LMTX_RET_REG	/* return success */
1469	ret
1470
1471Llmts_slow:
1472	test	$(M_ILOCKED_MSK), LMTX_C_REG32	/* is the interlock held */
1473	jz	Llmts_fail			/* no, must be held as a mutex */
1474
1475	cmp	$(MUTEX_DESTROYED), LMTX_C_REG32	/* check to see if its marked destroyed */
1476	je	lck_mtx_destroyed
1477	cmp	$(MUTEX_IND), LMTX_C_REG32	/* Is this an indirect mutex? */
1478	jne	Llmts_loop1
1479
1480	LMTX_ENTER_EXTENDED
1481Llmts_loop:
1482	PAUSE
1483	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1484Llmts_loop1:
1485	test	$(M_MLOCKED_MSK | M_SPIN_MSK), LMTX_C_REG32
1486	jnz	Llmts_fail
1487	test	$(M_ILOCKED_MSK), LMTX_C_REG32
1488	jz	Llmts_try
1489	jmp	Llmts_loop
1490
1491Llmts_busy_disabled:
1492	PREEMPTION_ENABLE
1493	jmp	Llmts_loop
1494
1495
1496
1497NONLEAF_ENTRY(lck_mtx_try_lock)
1498	LOAD_LMTX_REG(B_ARG0)			/* fetch lock pointer */
1499
1500	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1501	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32	/* is the interlock or mutex held */
1502	jnz	Llmt_slow
1503Llmt_try:					/* no - can't be INDIRECT, DESTROYED or locked */
1504	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
1505	or	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32
1506
1507	PREEMPTION_DISABLE
1508	lock
1509	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
1510	jne	Llmt_busy_disabled
1511
1512 	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG
1513	mov	LMTX_A_REG, M_OWNER(LMTX_REG)	/* record owner of mutex */
1514#if	MACH_LDEBUG
1515	test	LMTX_A_REG, LMTX_A_REG
1516	jz	1f
1517	incl	TH_MUTEX_COUNT(LMTX_A_REG)	/* lock statistic */
15181:
1519#endif	/* MACH_LDEBUG */
1520
1521	LMTX_CHK_EXTENDED_EXIT
1522
1523	test	$(M_WAITERS_MSK), LMTX_C_REG32
1524	jz	0f
1525
1526	LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
15270:
1528	andl	$(~M_ILOCKED_MSK), M_STATE(LMTX_REG)
1529	PREEMPTION_ENABLE
1530
1531	leave
1532#if	CONFIG_DTRACE
1533	mov	$1, LMTX_RET_REG		/* return success */
1534	/* Dtrace probe: LS_LCK_MTX_TRY_LOCK_ACQUIRE */
1535	LOCKSTAT_LABEL(_lck_mtx_try_lock_lockstat_patch_point)
1536	ret
1537	/* inherit lock pointer in LMTX_REG from above */
1538	LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, LMTX_REG)
1539#endif
1540	mov	$1, LMTX_RET_REG		/* return success */
1541	ret
1542
1543Llmt_slow:
1544	test	$(M_ILOCKED_MSK), LMTX_C_REG32	/* is the interlock held */
1545	jz	Llmt_fail			/* no, must be held as a mutex */
1546
1547	cmp	$(MUTEX_DESTROYED), LMTX_C_REG32	/* check to see if its marked destroyed */
1548	je	lck_mtx_destroyed
1549	cmp	$(MUTEX_IND), LMTX_C_REG32	/* Is this an indirect mutex? */
1550	jne	Llmt_loop
1551
1552	LMTX_ENTER_EXTENDED
1553Llmt_loop:
1554	PAUSE
1555	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1556Llmt_loop1:
1557	test	$(M_MLOCKED_MSK | M_SPIN_MSK), LMTX_C_REG32
1558	jnz	Llmt_fail
1559	test	$(M_ILOCKED_MSK), LMTX_C_REG32
1560	jz	Llmt_try
1561	jmp	Llmt_loop
1562
1563Llmt_busy_disabled:
1564	PREEMPTION_ENABLE
1565	jmp	Llmt_loop
1566
1567
1568Llmt_fail:
1569Llmts_fail:
1570	LMTX_CHK_EXTENDED		/* is this an extended mutex */
1571	je	0f
1572	LMTX_UPDATE_MISS
1573	LMTX_EXIT_EXTENDED
15740:
1575	xor	LMTX_RET_REG, LMTX_RET_REG
1576	NONLEAF_RET
1577
1578
1579
1580NONLEAF_ENTRY(lck_mtx_convert_spin)
1581	LOAD_LMTX_REG(B_ARG0)			/* fetch lock pointer */
1582
1583	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1584	cmp	$(MUTEX_IND), LMTX_C_REG32	/* Is this an indirect mutex? */
1585	jne	0f
1586	mov	M_PTR(LMTX_REG), LMTX_REG	/* If so, take indirection */
1587	mov	M_STATE(LMTX_REG), LMTX_C_REG32
15880:
1589	test	$(M_MLOCKED_MSK), LMTX_C_REG32	/* already owned as a mutex, just return */
1590	jnz	2f
1591	test	$(M_WAITERS_MSK), LMTX_C_REG32	/* are there any waiters? */
1592	jz	1f
1593
1594	LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
1595	mov	M_STATE(LMTX_REG), LMTX_C_REG32
15961:
1597	and	$(~(M_ILOCKED_MSK | M_SPIN_MSK)), LMTX_C_REG32	/* convert from spin version to mutex */
1598	or	$(M_MLOCKED_MSK), LMTX_C_REG32
1599	mov	LMTX_C_REG32, M_STATE(LMTX_REG)		/* since I own the interlock, I don't need an atomic update */
1600
1601	PREEMPTION_ENABLE
16022:
1603	NONLEAF_RET
1604
1605
1606
1607NONLEAF_ENTRY(lck_mtx_unlock)
1608	LOAD_LMTX_REG(B_ARG0)			/* fetch lock pointer */
1609Llmu_entry:
1610	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1611Llmu_prim:
1612	cmp	$(MUTEX_IND), LMTX_C_REG32	/* Is this an indirect mutex? */
1613	je	Llmu_ext
1614
1615Llmu_chktype:
1616	test	$(M_MLOCKED_MSK), LMTX_C_REG32	/* check for full mutex */
1617	jz	Llmu_unlock
1618Llmu_mutex:
1619	test	$(M_ILOCKED_MSK), LMTX_C_REG	/* have to wait for interlock to clear */
1620	jnz	Llmu_busy
1621
1622	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
1623	and	$(~M_MLOCKED_MSK), LMTX_C_REG32	/* drop mutex */
1624	or	$(M_ILOCKED_MSK), LMTX_C_REG32	/* pick up interlock */
1625
1626	PREEMPTION_DISABLE
1627	lock
1628	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
1629	jne	Llmu_busy_disabled		/* branch on failure to spin loop */
1630
1631Llmu_unlock:
1632	xor	LMTX_A_REG, LMTX_A_REG
1633	mov	LMTX_A_REG, M_OWNER(LMTX_REG)
1634	mov	LMTX_C_REG, LMTX_A_REG			/* keep original state in %ecx for later evaluation */
1635	and	$(~(M_ILOCKED_MSK | M_SPIN_MSK | M_PROMOTED_MSK)), LMTX_A_REG
1636
1637	test	$(M_WAITERS_MSK), LMTX_A_REG32
1638	jz	2f
1639	dec	LMTX_A_REG32				/* decrement waiter count */
16402:
1641	mov	LMTX_A_REG32, M_STATE(LMTX_REG)		/* since I own the interlock, I don't need an atomic update */
1642
1643#if	MACH_LDEBUG
1644	/* perform lock statistics after drop to prevent delay */
1645	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG
1646	test	LMTX_A_REG, LMTX_A_REG
1647	jz	1f
1648	decl	TH_MUTEX_COUNT(LMTX_A_REG)	/* lock statistic */
16491:
1650#endif	/* MACH_LDEBUG */
1651
1652	test	$(M_PROMOTED_MSK | M_WAITERS_MSK), LMTX_C_REG32
1653	jz	3f
1654
1655	LMTX_CALLEXT2(lck_mtx_unlock_wakeup_x86, LMTX_C_REG)
16563:
1657	PREEMPTION_ENABLE
1658
1659	LMTX_CHK_EXTENDED
1660	jne	4f
1661
1662	leave
1663#if	CONFIG_DTRACE
1664	/* Dtrace: LS_LCK_MTX_UNLOCK_RELEASE */
1665	LOCKSTAT_LABEL(_lck_mtx_unlock_lockstat_patch_point)
1666	ret
1667	/* inherit lock pointer in LMTX_REG from above */
1668	LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, LMTX_REG)
1669#endif
1670	ret
16714:
1672	leave
1673#if	CONFIG_DTRACE
1674	/* Dtrace: LS_LCK_MTX_EXT_UNLOCK_RELEASE */
1675	LOCKSTAT_LABEL(_lck_mtx_ext_unlock_lockstat_patch_point)
1676	ret
1677	/* inherit lock pointer in LMTX_REG from above */
1678	LOCKSTAT_RECORD(LS_LCK_MTX_EXT_UNLOCK_RELEASE, LMTX_REG)
1679#endif
1680	ret
1681
1682
1683Llmu_busy_disabled:
1684	PREEMPTION_ENABLE
1685Llmu_busy:
1686	PAUSE
1687	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1688	jmp	Llmu_mutex
1689
1690Llmu_ext:
1691	mov	M_PTR(LMTX_REG), LMTX_REG
1692	mov	M_OWNER(LMTX_REG), LMTX_A_REG
1693	mov	%gs:CPU_ACTIVE_THREAD, LMTX_C_REG
1694	CHECK_UNLOCK(LMTX_C_REG, LMTX_A_REG)
1695	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1696	jmp 	Llmu_chktype
1697
1698
1699
1700LEAF_ENTRY(lck_mtx_ilk_unlock)
1701	LOAD_LMTX_REG(L_ARG0)			/* fetch lock pointer - no indirection here */
1702
1703	andl	$(~M_ILOCKED_MSK), M_STATE(LMTX_REG)
1704
1705	PREEMPTION_ENABLE			/* need to re-enable preemption */
1706
1707	LEAF_RET
1708
1709
1710
1711LEAF_ENTRY(lck_mtx_lock_grab_mutex)
1712	LOAD_LMTX_REG(L_ARG0)			/* fetch lock pointer - no indirection here */
1713
1714	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1715
1716	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32	/* can't have the mutex yet */
1717	jnz	3f
1718
1719	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
1720	or	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32
1721
1722	PREEMPTION_DISABLE
1723	lock
1724	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
1725	jne	2f				/* branch on failure to spin loop */
1726
1727 	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG
1728	mov	LMTX_A_REG, M_OWNER(LMTX_REG)	/* record owner of mutex */
1729#if	MACH_LDEBUG
1730	test	LMTX_A_REG, LMTX_A_REG
1731	jz	1f
1732	incl	TH_MUTEX_COUNT(LMTX_A_REG)	/* lock statistic */
17331:
1734#endif	/* MACH_LDEBUG */
1735
1736	mov	$1, LMTX_RET_REG		/* return success */
1737	LEAF_RET
17382:
1739	PREEMPTION_ENABLE
17403:
1741	xor	LMTX_RET_REG, LMTX_RET_REG	/* return failure */
1742	LEAF_RET
1743
1744
1745
1746LEAF_ENTRY(lck_mtx_lock_mark_destroyed)
1747	LOAD_LMTX_REG(L_ARG0)
17481:
1749	mov	M_STATE(LMTX_REG), LMTX_C_REG32
1750	cmp	$(MUTEX_IND), LMTX_C_REG32	/* Is this an indirect mutex? */
1751	jne	2f
1752
1753	movl	$(MUTEX_DESTROYED), M_STATE(LMTX_REG)	/* convert to destroyed state */
1754	jmp	3f
17552:
1756	test	$(M_ILOCKED_MSK), LMTX_C_REG	/* have to wait for interlock to clear */
1757	jnz	5f
1758
1759	PREEMPTION_DISABLE
1760	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
1761	or	$(M_ILOCKED_MSK), LMTX_C_REG32
1762	lock
1763	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
1764	jne	4f				/* branch on failure to spin loop */
1765	movl	$(MUTEX_DESTROYED), M_STATE(LMTX_REG)	/* convert to destroyed state */
1766	PREEMPTION_ENABLE
17673:
1768	LEAF_RET				/* return with M_ILOCKED set */
17694:
1770	PREEMPTION_ENABLE
17715:
1772	PAUSE
1773	jmp	1b
1774
1775LEAF_ENTRY(preemption_underflow_panic)
1776	FRAME
1777	incl	%gs:CPU_PREEMPTION_LEVEL
1778	ALIGN_STACK()
1779	LOAD_STRING_ARG0(16f)
1780	CALL_PANIC()
1781	hlt
1782	.data
178316:	String	"Preemption level underflow, possible cause unlocking an unlocked mutex or spinlock"
1784	.text
1785
1786
1787LEAF_ENTRY(_disable_preemption)
1788#if	MACH_RT
1789	PREEMPTION_DISABLE
1790#endif	/* MACH_RT */
1791	LEAF_RET
1792
1793LEAF_ENTRY(_enable_preemption)
1794#if	MACH_RT
1795#if	MACH_ASSERT
1796	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL
1797	jg	1f
1798	movl	%gs:CPU_PREEMPTION_LEVEL,%esi
1799	ALIGN_STACK()
1800	LOAD_STRING_ARG0(_enable_preemption_less_than_zero)
1801	CALL_PANIC()
1802	hlt
1803	.cstring
1804_enable_preemption_less_than_zero:
1805	.asciz	"_enable_preemption: preemption_level(%d)  < 0!"
1806	.text
18071:
1808#endif	/* MACH_ASSERT */
1809	PREEMPTION_ENABLE
1810#endif	/* MACH_RT */
1811	LEAF_RET
1812
1813LEAF_ENTRY(_enable_preemption_no_check)
1814#if	MACH_RT
1815#if	MACH_ASSERT
1816	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL
1817	jg	1f
1818	ALIGN_STACK()
1819	LOAD_STRING_ARG0(_enable_preemption_no_check_less_than_zero)
1820	CALL_PANIC()
1821	hlt
1822	.cstring
1823_enable_preemption_no_check_less_than_zero:
1824	.asciz	"_enable_preemption_no_check: preemption_level <= 0!"
1825	.text
18261:
1827#endif	/* MACH_ASSERT */
1828	_ENABLE_PREEMPTION_NO_CHECK
1829#endif	/* MACH_RT */
1830	LEAF_RET
1831
1832
1833LEAF_ENTRY(_mp_disable_preemption)
1834#if	MACH_RT
1835	PREEMPTION_DISABLE
1836#endif	/* MACH_RT */
1837	LEAF_RET
1838
1839LEAF_ENTRY(_mp_enable_preemption)
1840#if	MACH_RT
1841#if	MACH_ASSERT
1842	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL
1843	jg	1f
1844	movl	%gs:CPU_PREEMPTION_LEVEL,%esi
1845	ALIGN_PANIC()
1846	LOAD_STRING_ARG0(_mp_enable_preemption_less_than_zero)
1847	CALL_PANIC()
1848	hlt
1849	.cstring
1850_mp_enable_preemption_less_than_zero:
1851	.asciz "_mp_enable_preemption: preemption_level (%d) <= 0!"
1852	.text
18531:
1854#endif	/* MACH_ASSERT */
1855	PREEMPTION_ENABLE
1856#endif	/* MACH_RT */
1857	LEAF_RET
1858
1859LEAF_ENTRY(_mp_enable_preemption_no_check)
1860#if	MACH_RT
1861#if	MACH_ASSERT
1862	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL
1863	jg	1f
1864	ALIGN_STACK()
1865	LOAD_STRING_ARG0(_mp_enable_preemption_no_check_less_than_zero)
1866	CALL_PANIC()
1867	hlt
1868	.cstring
1869_mp_enable_preemption_no_check_less_than_zero:
1870	.asciz "_mp_enable_preemption_no_check: preemption_level <= 0!"
1871	.text
18721:
1873#endif	/* MACH_ASSERT */
1874	_ENABLE_PREEMPTION_NO_CHECK
1875#endif	/* MACH_RT */
1876	LEAF_RET
1877
1878
1879LEAF_ENTRY(i_bit_set)
1880	lock
1881	bts	%edi,(%rsi)
1882	LEAF_RET
1883
1884LEAF_ENTRY(i_bit_clear)
1885	lock
1886	btr	%edi,(%rsi)
1887	LEAF_RET
1888
1889
1890LEAF_ENTRY(bit_lock)
18911:
1892	lock
1893	bts	%edi,(%rsi)
1894	jb	1b
1895	LEAF_RET
1896
1897
1898LEAF_ENTRY(bit_lock_try)
1899	lock
1900	bts	%edi,(%rsi)
1901	jb	bit_lock_failed
1902	movl	$1, %eax
1903	LEAF_RET
1904bit_lock_failed:
1905	xorl	%eax,%eax
1906	LEAF_RET
1907
1908LEAF_ENTRY(bit_unlock)
1909	lock
1910	btr	%edi,(%rsi)
1911	LEAF_RET
1912
1913
1914/*
1915 * Atomic primitives, prototyped in kern/simple_lock.h
1916 */
1917LEAF_ENTRY(hw_atomic_add)
1918#if	MACH_LDEBUG
1919	test	$3, %rdi
1920	jz	1f
1921	ud2
19221:
1923#endif
1924	movl	%esi, %eax		/* Load addend */
1925	lock 	xaddl %eax, (%rdi)		/* Atomic exchange and add */
1926	addl	%esi, %eax		/* Calculate result */
1927	LEAF_RET
1928
1929LEAF_ENTRY(hw_atomic_sub)
1930#if	MACH_LDEBUG
1931	test	$3, %rdi
1932	jz	1f
1933	ud2
19341:
1935#endif
1936	negl	%esi
1937	movl	%esi, %eax
1938	lock	xaddl %eax, (%rdi)		/* Atomic exchange and add */
1939	addl	%esi, %eax		/* Calculate result */
1940	LEAF_RET
1941
1942LEAF_ENTRY(hw_atomic_or)
1943#if	MACH_LDEBUG
1944	test	$3, %rdi
1945	jz	1f
1946	ud2
19471:
1948#endif
1949	movl	(%rdi), %eax
19501:
1951	movl	%esi, %edx		/* Load mask */
1952	orl	%eax, %edx
1953	lock	cmpxchgl %edx, (%rdi)	/* Atomic CAS */
1954	jne	1b
1955	movl	%edx, %eax		/* Result */
1956	LEAF_RET
1957/*
1958 * A variant of hw_atomic_or which doesn't return a value.
1959 * The implementation is thus comparatively more efficient.
1960 */
1961
1962LEAF_ENTRY(hw_atomic_or_noret)
1963#if	MACH_LDEBUG
1964	test	$3, %rdi
1965	jz	1f
1966	ud2
19671:
1968#endif
1969	lock
1970	orl	%esi, (%rdi)		/* Atomic OR */
1971	LEAF_RET
1972
1973
1974LEAF_ENTRY(hw_atomic_and)
1975#if	MACH_LDEBUG
1976	test	$3, %rdi
1977	jz	1f
1978	ud2
19791:
1980#endif
1981	movl	(%rdi), %eax
19821:
1983	movl	%esi, %edx		/* Load mask */
1984	andl	%eax, %edx
1985	lock	cmpxchgl %edx, (%rdi)	/* Atomic CAS */
1986	jne	1b
1987	movl	%edx, %eax		/* Result */
1988	LEAF_RET
1989/*
1990 * A variant of hw_atomic_and which doesn't return a value.
1991 * The implementation is thus comparatively more efficient.
1992 */
1993
1994LEAF_ENTRY(hw_atomic_and_noret)
1995#if	MACH_LDEBUG
1996	test	$3, %rdi
1997	jz	1f
1998	ud2
19991:
2000#endif
2001	lock	andl	%esi, (%rdi)		/* Atomic OR */
2002	LEAF_RET
2003
2004