1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1989 Carnegie-Mellon University
34 * All rights reserved.  The CMU software License Agreement specifies
35 * the terms and conditions for use and redistribution.
36 */
37
38#include <mach_rt.h>
39#include <mach_ldebug.h>
40#include <i386/asm.h>
41#include <i386/eflags.h>
42#include <i386/trap.h>
43#include <config_dtrace.h>
44#include <i386/mp.h>
45
46#include "assym.s"
47
48#define	PAUSE		rep; nop
49
50#include <i386/pal_lock_asm.h>
51
52#define LEAF_ENTRY(name)	\
53	Entry(name)
54
55#define LEAF_ENTRY2(n1,n2)	\
56	Entry(n1);		\
57	Entry(n2)
58
59#define LEAF_RET		\
60	ret
61
62/* Non-leaf routines always have a stack frame: */
63
64#define NONLEAF_ENTRY(name)	\
65	Entry(name);		\
66	FRAME
67
68#define NONLEAF_ENTRY2(n1,n2)	\
69	Entry(n1);		\
70	Entry(n2);		\
71	FRAME
72
73#define NONLEAF_RET		\
74	EMARF;			\
75	ret
76
77
78/* For x86_64, the varargs ABI requires that %al indicate
79 * how many SSE register contain arguments. In our case, 0 */
80#define ALIGN_STACK() 		and  $0xFFFFFFFFFFFFFFF0, %rsp ;
81#define LOAD_STRING_ARG0(label)	leaq label(%rip), %rdi ;
82#define LOAD_ARG1(x)		mov x, %esi ;
83#define LOAD_PTR_ARG1(x)	mov x, %rsi ;
84#define CALL_PANIC()		xorb %al,%al ; call EXT(panic) ;
85
86#define	CHECK_UNLOCK(current, owner)				\
87	cmp	current, owner				;	\
88	je	1f					;	\
89	ALIGN_STACK()					;	\
90	LOAD_STRING_ARG0(2f)				;	\
91	CALL_PANIC()					;	\
92	hlt						;	\
93	.data						;	\
942:	String	"Mutex unlock attempted from non-owner thread";	\
95	.text						;	\
961:
97
98#if	MACH_LDEBUG
99/*
100 *  Routines for general lock debugging.
101 */
102
103/*
104 * Checks for expected lock types and calls "panic" on
105 * mismatch.  Detects calls to Mutex functions with
106 * type simplelock and vice versa.
107 */
108#define	CHECK_MUTEX_TYPE()					\
109	cmpl	$ MUTEX_TAG,M_TYPE			;	\
110	je	1f					;	\
111	ALIGN_STACK()					;	\
112	LOAD_STRING_ARG0(2f)				;	\
113	CALL_PANIC()					;	\
114	hlt						;	\
115	.data						;	\
1162:	String	"not a mutex!"				;	\
117	.text						;	\
1181:
119
120/*
121 * If one or more simplelocks are currently held by a thread,
122 * an attempt to acquire a mutex will cause this check to fail
123 * (since a mutex lock may context switch, holding a simplelock
124 * is not a good thing).
125 */
126#if	MACH_RT
127#define CHECK_PREEMPTION_LEVEL()				\
128	cmpl	$0,%gs:CPU_HIBERNATE			;	\
129	jne	1f					;	\
130	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL		;	\
131	je	1f					;	\
132	ALIGN_STACK()					;	\
133	movl	%gs:CPU_PREEMPTION_LEVEL, %eax		;	\
134	LOAD_ARG1(%eax)					;	\
135	LOAD_STRING_ARG0(2f)				;	\
136	CALL_PANIC()					;	\
137	hlt						;	\
138	.data						;	\
1392:	String	"preemption_level(%d) != 0!"		;	\
140	.text						;	\
1411:
142#else	/* MACH_RT */
143#define	CHECK_PREEMPTION_LEVEL()
144#endif	/* MACH_RT */
145
146#define	CHECK_MYLOCK(current, owner)				\
147	cmp	current, owner				;	\
148	jne	1f					;	\
149	ALIGN_STACK()					;	\
150	LOAD_STRING_ARG0(2f)				;	\
151	CALL_PANIC()					;	\
152	hlt						;	\
153	.data						;	\
1542:	String	"Attempt to recursively lock a non-recursive lock";	\
155	.text						;	\
1561:
157
158#else	/* MACH_LDEBUG */
159#define	CHECK_MUTEX_TYPE()
160#define CHECK_PREEMPTION_LEVEL()
161#define	CHECK_MYLOCK(thd)
162#endif	/* MACH_LDEBUG */
163
164#define PREEMPTION_DISABLE				\
165	incl	%gs:CPU_PREEMPTION_LEVEL
166
167#define	PREEMPTION_LEVEL_DEBUG 1
168#if	PREEMPTION_LEVEL_DEBUG
169#define	PREEMPTION_ENABLE				\
170	decl	%gs:CPU_PREEMPTION_LEVEL	;	\
171	js	17f				;	\
172	jnz	19f				;	\
173	testl	$AST_URGENT,%gs:CPU_PENDING_AST	;	\
174	jz	19f				;	\
175	PUSHF					;	\
176	testl	$EFL_IF, S_PC			;	\
177	jz	18f				;	\
178	POPF					;	\
179	int	$(T_PREEMPT)			;	\
180	jmp	19f				;	\
18117:							\
182	call	_preemption_underflow_panic	;	\
18318:							\
184	POPF					;	\
18519:
186#else
187#define	PREEMPTION_ENABLE				\
188	decl	%gs:CPU_PREEMPTION_LEVEL	;	\
189	jnz	19f				;	\
190	testl	$AST_URGENT,%gs:CPU_PENDING_AST	;	\
191	jz	19f				;	\
192	PUSHF					;	\
193	testl	$EFL_IF, S_PC			;	\
194	jz	18f				;	\
195	POPF					;	\
196	int	$(T_PREEMPT)			;	\
197	jmp	19f				;	\
19818:							\
199	POPF					;	\
20019:
201#endif
202
203
204#if	CONFIG_DTRACE
205
206       .globl  _lockstat_probe
207       .globl  _lockstat_probemap
208
209/*
210 * LOCKSTAT_LABEL creates a dtrace symbol which contains
211 * a pointer into the lock code function body. At that
212 * point is a "ret" instruction that can be patched into
213 * a "nop"
214 */
215
216#define        LOCKSTAT_LABEL(lab) \
217       .data                                       ;\
218       .globl  lab                                 ;\
219       lab:                                        ;\
220       .quad 9f                                    ;\
221       .text                                       ;\
222       9:
223
224#define LOCKSTAT_RECORD(id, lck) \
225       push    %rbp                                ;       \
226       mov     %rsp,%rbp                           ;       \
227       movl    _lockstat_probemap + (id * 4)(%rip),%eax ;  \
228       test    %eax,%eax                           ;       \
229       je              9f                          ;       \
230       mov             lck, %rsi                   ;       \
231       mov             %rax, %rdi                  ;       \
232       mov             $0, %rdx                    ;       \
233       mov             $0, %rcx                    ;       \
234       mov             $0, %r8                     ;       \
235       mov             $0, %r9                     ;       \
236       call    *_lockstat_probe(%rip)              ;       \
2379:	leave
238	/* ret - left to subsequent code, e.g. return values */
239
240#endif /* CONFIG_DTRACE */
241
242/*
243 * For most routines, the hw_lock_t pointer is loaded into a
244 * register initially, and then either a byte or register-sized
245 * word is loaded/stored to the pointer
246 */
247
248/*
249 *	void hw_lock_init(hw_lock_t)
250 *
251 *	Initialize a hardware lock.
252 */
253LEAF_ENTRY(hw_lock_init)
254	movq	$0, (%rdi)		/* clear the lock */
255	LEAF_RET
256
257
258/*
259 *	void hw_lock_byte_init(volatile uint8_t *)
260 *
261 *	Initialize a hardware byte lock.
262 */
263LEAF_ENTRY(hw_lock_byte_init)
264	movb	$0, (%rdi)		/* clear the lock */
265	LEAF_RET
266
267/*
268 *	void hw_lock_lock(hw_lock_t)
269 *
270 *	Acquire lock, spinning until it becomes available.
271 *	MACH_RT:  also return with preemption disabled.
272 */
273LEAF_ENTRY(hw_lock_lock)
274	mov	%gs:CPU_ACTIVE_THREAD, %rcx	/* get thread pointer */
275
276	PREEMPTION_DISABLE
2771:
278	mov	(%rdi), %rax
279	test	%rax,%rax		/* lock locked? */
280	jne	3f			/* branch if so */
281	lock; cmpxchg %rcx,(%rdi)	/* try to acquire the HW lock */
282	jne	3f
283	movl	$1,%eax			/* In case this was a timeout call */
284	LEAF_RET			/* if yes, then nothing left to do */
2853:
286	PAUSE				/* pause for hyper-threading */
287	jmp	1b			/* try again */
288
289/*
290 *	void	hw_lock_byte_lock(uint8_t *lock_byte)
291 *
292 *	Acquire byte sized lock operand, spinning until it becomes available.
293 *	MACH_RT:  also return with preemption disabled.
294 */
295
296LEAF_ENTRY(hw_lock_byte_lock)
297	PREEMPTION_DISABLE
298	movl	$1, %ecx		/* Set lock value */
2991:
300	movb	(%rdi), %al		/* Load byte at address */
301	testb	%al,%al			/* lock locked? */
302	jne	3f			/* branch if so */
303	lock; cmpxchg %cl,(%rdi)	/* attempt atomic compare exchange */
304	jne	3f
305	LEAF_RET			/* if yes, then nothing left to do */
3063:
307	PAUSE				/* pause for hyper-threading */
308	jmp	1b			/* try again */
309
310/*
311 *	unsigned int hw_lock_to(hw_lock_t, unsigned int)
312 *
313 *	Acquire lock, spinning until it becomes available or timeout.
314 *	MACH_RT:  also return with preemption disabled.
315 */
316LEAF_ENTRY(hw_lock_to)
3171:
318	mov	%gs:CPU_ACTIVE_THREAD, %rcx
319
320	/*
321	 * Attempt to grab the lock immediately
322	 * - fastpath without timeout nonsense.
323	 */
324	PREEMPTION_DISABLE
325
326	mov	(%rdi), %rax
327	test	%rax,%rax		/* lock locked? */
328	jne	2f			/* branch if so */
329	lock; cmpxchg %rcx,(%rdi)	/* try to acquire the HW lock */
330	jne	2f			/* branch on failure */
331	movl	$1,%eax
332	LEAF_RET
333
3342:
335#define	INNER_LOOP_COUNT	1000
336	/*
337	 * Failed to get the lock so set the timeout
338	 * and then spin re-checking the lock but pausing
339	 * every so many (INNER_LOOP_COUNT) spins to check for timeout.
340	 */
341	push	%r9
342	lfence
343	rdtsc				/* read cyclecount into %edx:%eax */
344	shlq	$32, %rdx
345	orq	%rdx, %rax		/* load 64-bit quantity into %rax */
346	addq	%rax, %rsi		/* %rsi is the timeout expiry */
347
3484:
349	/*
350	 * The inner-loop spin to look for the lock being freed.
351	 */
352	mov	$(INNER_LOOP_COUNT),%r9
3535:
354	PAUSE				/* pause for hyper-threading */
355	mov	(%rdi),%rax		/* spin checking lock value in cache */
356	test	%rax,%rax
357	je	6f			/* zero => unlocked, try to grab it */
358	decq	%r9			/* decrement inner loop count */
359	jnz	5b			/* time to check for timeout? */
360
361	/*
362	 * Here after spinning INNER_LOOP_COUNT times, check for timeout
363	 */
364	lfence
365	rdtsc				/* cyclecount into %edx:%eax */
366	shlq	$32, %rdx
367	orq	%rdx, %rax		/* load 64-bit quantity into %rax */
368	cmpq	%rsi, %rax		/* compare to timeout */
369	jb	4b			/* continue spinning if less, or */
370	xor	%rax,%rax		/* with 0 return value */
371	pop	%r9
372	LEAF_RET
373
3746:
375	/*
376	 * Here to try to grab the lock that now appears to be free
377	 * after contention.
378	 */
379	mov	%gs:CPU_ACTIVE_THREAD, %rcx
380	lock; cmpxchg %rcx,(%rdi)	/* try to acquire the HW lock */
381	jne	4b			/* no - spin again */
382	movl	$1,%eax			/* yes */
383	pop	%r9
384	LEAF_RET
385
386/*
387 *	void hw_lock_unlock(hw_lock_t)
388 *
389 *	Unconditionally release lock.
390 *	MACH_RT:  release preemption level.
391 */
392LEAF_ENTRY(hw_lock_unlock)
393	movq $0, (%rdi)		/* clear the lock */
394	PREEMPTION_ENABLE
395	LEAF_RET
396
397/*
398 *	void hw_lock_byte_unlock(uint8_t *lock_byte)
399 *
400 *	Unconditionally release byte sized lock operand.
401 *	MACH_RT:  release preemption level.
402 */
403
404LEAF_ENTRY(hw_lock_byte_unlock)
405	movb $0, (%rdi)		/* Clear the lock byte */
406	PREEMPTION_ENABLE
407	LEAF_RET
408
409/*
410 *	unsigned int hw_lock_try(hw_lock_t)
411 *	MACH_RT:  returns with preemption disabled on success.
412 */
413LEAF_ENTRY(hw_lock_try)
414	mov	%gs:CPU_ACTIVE_THREAD, %rcx
415	PREEMPTION_DISABLE
416
417	mov	(%rdi),%rax
418	test	%rax,%rax
419	jne	1f
420	lock; cmpxchg %rcx,(%rdi)	/* try to acquire the HW lock */
421	jne	1f
422
423	movl	$1,%eax			/* success */
424	LEAF_RET
425
4261:
427	PREEMPTION_ENABLE		/* failure:  release preemption... */
428	xorl	%eax,%eax		/* ...and return failure */
429	LEAF_RET
430
431/*
432 *	unsigned int hw_lock_held(hw_lock_t)
433 *	MACH_RT:  doesn't change preemption state.
434 *	N.B.  Racy, of course.
435 */
436LEAF_ENTRY(hw_lock_held)
437	mov	(%rdi),%rax		/* check lock value */
438	test	%rax,%rax
439	movl	$1,%ecx
440	cmovne	%ecx,%eax		/* 0 => unlocked, 1 => locked */
441	LEAF_RET
442
443
444/*
445 * Reader-writer lock fastpaths. These currently exist for the
446 * shared lock acquire, the exclusive lock acquire, the shared to
447 * exclusive upgrade and the release paths (where they reduce overhead
448 * considerably) -- these are by far the most frequently used routines
449 *
450 * The following should reflect the layout of the bitfield embedded within
451 * the lck_rw_t structure (see i386/locks.h).
452 */
453#define LCK_RW_INTERLOCK	(0x1 << 16)
454
455#define LCK_RW_PRIV_EXCL	(0x1 << 24)
456#define LCK_RW_WANT_UPGRADE	(0x2 << 24)
457#define LCK_RW_WANT_WRITE	(0x4 << 24)
458#define LCK_R_WAITING		(0x8 << 24)
459#define LCK_W_WAITING		(0x10 << 24)
460
461#define LCK_RW_SHARED_MASK	(0xffff)
462
463/*
464 * For most routines, the lck_rw_t pointer is loaded into a
465 * register initially, and the flags bitfield loaded into another
466 * register and examined
467 */
468
469#define	RW_LOCK_SHARED_MASK (LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE)
470/*
471 *	void lck_rw_lock_shared(lck_rw_t *)
472 *
473 */
474Entry(lck_rw_lock_shared)
475	mov	%gs:CPU_ACTIVE_THREAD, %rcx	/* Load thread pointer */
476	incl	TH_RWLOCK_COUNT(%rcx)		/* Increment count before atomic CAS */
4771:
478	mov	(%rdi), %eax		/* Load state bitfield and interlock */
479	testl	$(RW_LOCK_SHARED_MASK), %eax	/* Eligible for fastpath? */
480	jne	3f
481
482	movl	%eax, %ecx			/* original value in %eax for cmpxchgl */
483	incl	%ecx				/* Increment reader refcount */
484	lock
485	cmpxchgl %ecx, (%rdi)			/* Attempt atomic exchange */
486	jne	2f
487
488#if	CONFIG_DTRACE
489	/*
490	 * Dtrace lockstat event: LS_LCK_RW_LOCK_SHARED_ACQUIRE
491	 * Implemented by swapping between return and no-op instructions.
492	 * See bsd/dev/dtrace/lockstat.c.
493	 */
494	LOCKSTAT_LABEL(_lck_rw_lock_shared_lockstat_patch_point)
495	ret
496	/*
497	Fall thru when patched, counting on lock pointer in %rdi
498	*/
499	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, %rdi)
500#endif
501	ret
5022:
503	PAUSE
504	jmp	1b
5053:
506	jmp	EXT(lck_rw_lock_shared_gen)
507
508
509
510#define	RW_TRY_LOCK_SHARED_MASK (LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE)
511/*
512 *	void lck_rw_try_lock_shared(lck_rw_t *)
513 *
514 */
515Entry(lck_rw_try_lock_shared)
5161:
517	mov	(%rdi), %eax		/* Load state bitfield and interlock */
518	testl	$(LCK_RW_INTERLOCK), %eax
519	jne	2f
520	testl	$(RW_TRY_LOCK_SHARED_MASK), %eax
521	jne	3f			/* lock is busy */
522
523	movl	%eax, %ecx			/* original value in %eax for cmpxchgl */
524	incl	%ecx				/* Increment reader refcount */
525	lock
526	cmpxchgl %ecx, (%rdi)			/* Attempt atomic exchange */
527	jne	2f
528
529	mov	%gs:CPU_ACTIVE_THREAD, %rcx	/* Load thread pointer */
530	incl	TH_RWLOCK_COUNT(%rcx)		/* Increment count on success. */
531	/* There is a 3 instr window where preemption may not notice rwlock_count after cmpxchg */
532
533#if	CONFIG_DTRACE
534	movl	$1, %eax
535	/*
536	 * Dtrace lockstat event: LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE
537	 * Implemented by swapping between return and no-op instructions.
538	 * See bsd/dev/dtrace/lockstat.c.
539	 */
540	LOCKSTAT_LABEL(_lck_rw_try_lock_shared_lockstat_patch_point)
541	ret
542	/* Fall thru when patched, counting on lock pointer in %rdi  */
543	LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, %rdi)
544#endif
545	movl	$1, %eax			/* return TRUE */
546	ret
5472:
548	PAUSE
549	jmp	1b
5503:
551	xorl	%eax, %eax
552	ret
553
554
555#define	RW_LOCK_EXCLUSIVE_HELD	(LCK_RW_WANT_WRITE | LCK_RW_WANT_UPGRADE)
556/*
557 *	int lck_rw_grab_shared(lck_rw_t *)
558 *
559 */
560Entry(lck_rw_grab_shared)
5611:
562	mov	(%rdi), %eax		/* Load state bitfield and interlock */
563	testl	$(LCK_RW_INTERLOCK), %eax
564	jne	5f
565	testl	$(RW_LOCK_EXCLUSIVE_HELD), %eax
566	jne	3f
5672:
568	movl	%eax, %ecx		/* original value in %eax for cmpxchgl */
569	incl	%ecx			/* Increment reader refcount */
570	lock
571	cmpxchgl %ecx, (%rdi)		/* Attempt atomic exchange */
572	jne	4f
573
574	movl	$1, %eax		/* return success */
575	ret
5763:
577	testl	$(LCK_RW_SHARED_MASK), %eax
578	je	4f
579	testl	$(LCK_RW_PRIV_EXCL), %eax
580	je	2b
5814:
582	xorl	%eax, %eax		/* return failure */
583	ret
5845:
585	PAUSE
586	jmp	1b
587
588
589
590#define	RW_LOCK_EXCLUSIVE_MASK (LCK_RW_SHARED_MASK | LCK_RW_INTERLOCK | \
591	                        LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE)
592/*
593 *	void lck_rw_lock_exclusive(lck_rw_t*)
594 *
595 */
596Entry(lck_rw_lock_exclusive)
597	mov	%gs:CPU_ACTIVE_THREAD, %rcx	/* Load thread pointer */
598	incl	TH_RWLOCK_COUNT(%rcx)		/* Increment count before atomic CAS */
5991:
600	mov	(%rdi), %eax		/* Load state bitfield, interlock and shared count */
601	testl	$(RW_LOCK_EXCLUSIVE_MASK), %eax		/* Eligible for fastpath? */
602	jne	3f					/* no, go slow */
603
604	movl	%eax, %ecx				/* original value in %eax for cmpxchgl */
605	orl	$(LCK_RW_WANT_WRITE), %ecx
606	lock
607	cmpxchgl %ecx, (%rdi)			/* Attempt atomic exchange */
608	jne	2f
609
610#if	CONFIG_DTRACE
611	/*
612	 * Dtrace lockstat event: LS_LCK_RW_LOCK_EXCL_ACQUIRE
613	 * Implemented by swapping between return and no-op instructions.
614	 * See bsd/dev/dtrace/lockstat.c.
615	 */
616	LOCKSTAT_LABEL(_lck_rw_lock_exclusive_lockstat_patch_point)
617	ret
618	/* Fall thru when patched, counting on lock pointer in %rdi  */
619	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, %rdi)
620#endif
621	ret
6222:
623	PAUSE
624	jmp	1b
6253:
626	jmp	EXT(lck_rw_lock_exclusive_gen)
627
628
629
630#define	RW_TRY_LOCK_EXCLUSIVE_MASK (LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE)
631/*
632 *	void lck_rw_try_lock_exclusive(lck_rw_t *)
633 *
634 *		Tries to get a write lock.
635 *
636 *		Returns FALSE if the lock is not held on return.
637 */
638Entry(lck_rw_try_lock_exclusive)
6391:
640	mov	(%rdi), %eax		/* Load state bitfield, interlock and shared count */
641	testl	$(LCK_RW_INTERLOCK), %eax
642	jne	2f
643	testl	$(RW_TRY_LOCK_EXCLUSIVE_MASK), %eax
644	jne	3f				/* can't get it */
645
646	movl	%eax, %ecx			/* original value in %eax for cmpxchgl */
647	orl	$(LCK_RW_WANT_WRITE), %ecx
648	lock
649	cmpxchgl %ecx, (%rdi)			/* Attempt atomic exchange */
650	jne	2f
651
652	mov	%gs:CPU_ACTIVE_THREAD, %rcx	/* Load thread pointer */
653	incl	TH_RWLOCK_COUNT(%rcx)		/* Increment count on success. */
654	/* There is a 3 instr window where preemption may not notice rwlock_count after cmpxchg */
655
656#if	CONFIG_DTRACE
657	movl	$1, %eax
658	/*
659	 * Dtrace lockstat event: LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE
660	 * Implemented by swapping between return and no-op instructions.
661	 * See bsd/dev/dtrace/lockstat.c.
662	 */
663	LOCKSTAT_LABEL(_lck_rw_try_lock_exclusive_lockstat_patch_point)
664	ret
665	/* Fall thru when patched, counting on lock pointer in %rdi  */
666	LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, %rdi)
667#endif
668	movl	$1, %eax			/* return TRUE */
669	ret
6702:
671	PAUSE
672	jmp	1b
6733:
674	xorl	%eax, %eax			/* return FALSE */
675	ret
676
677
678
679/*
680 *	void lck_rw_lock_shared_to_exclusive(lck_rw_t*)
681 *
682 *	fastpath can be taken if
683 *	the current rw_shared_count == 1
684 *	AND the interlock is clear
685 *	AND RW_WANT_UPGRADE is not set
686 *
687 *	note that RW_WANT_WRITE could be set, but will not
688 *	be indicative of an exclusive hold since we have
689 * 	a read count on the lock that we have not yet released
690 *	we can blow by that state since the lck_rw_lock_exclusive
691 * 	function will block until rw_shared_count == 0 and
692 * 	RW_WANT_UPGRADE is clear... it does this check behind
693 *	the interlock which we are also checking for
694 *
695 * 	to make the transition we must be able to atomically
696 *	set RW_WANT_UPGRADE and get rid of the read count we hold
697 */
698Entry(lck_rw_lock_shared_to_exclusive)
6991:
700	mov	(%rdi), %eax		/* Load state bitfield, interlock and shared count */
701	testl	$(LCK_RW_INTERLOCK), %eax
702	jne	7f
703	testl	$(LCK_RW_WANT_UPGRADE), %eax
704	jne	2f
705
706	movl	%eax, %ecx			/* original value in %eax for cmpxchgl */
707	orl	$(LCK_RW_WANT_UPGRADE), %ecx	/* ask for WANT_UPGRADE */
708	decl	%ecx				/* and shed our read count */
709	lock
710	cmpxchgl %ecx, (%rdi)			/* Attempt atomic exchange */
711	jne	7f
712						/* we now own the WANT_UPGRADE */
713	testl	$(LCK_RW_SHARED_MASK), %ecx	/* check to see if all of the readers are drained */
714	jne	8f				/* if not, we need to go wait */
715
716#if	CONFIG_DTRACE
717	movl	$1, %eax
718	/*
719	 * Dtrace lockstat event: LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE
720	 * Implemented by swapping between return and no-op instructions.
721	 * See bsd/dev/dtrace/lockstat.c.
722	 */
723	LOCKSTAT_LABEL(_lck_rw_lock_shared_to_exclusive_lockstat_patch_point)
724	ret
725    /* Fall thru when patched, counting on lock pointer in %rdi  */
726    LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, %rdi)
727#endif
728	movl	$1, %eax			/* return success */
729	ret
730
7312:						/* someone else already holds WANT_UPGRADE */
732	movl	%eax, %ecx			/* original value in %eax for cmpxchgl */
733	decl	%ecx				/* shed our read count */
734	testl	$(LCK_RW_SHARED_MASK), %ecx
735	jne	3f				/* we were the last reader */
736	andl	$(~LCK_W_WAITING), %ecx		/* so clear the wait indicator */
7373:
738	lock
739	cmpxchgl %ecx, (%rdi)			/* Attempt atomic exchange */
740	jne	7f
741
742	mov	%eax, %esi			/* put old flags as second arg */
743						/* lock is alread in %rdi */
744	call	EXT(lck_rw_lock_shared_to_exclusive_failure)
745	ret					/* and pass the failure return along */
7467:
747	PAUSE
748	jmp	1b
7498:
750	jmp	EXT(lck_rw_lock_shared_to_exclusive_success)
751
752
753
754	.cstring
755rwl_release_error_str:
756	.asciz  "Releasing non-exclusive RW lock without a reader refcount!"
757	.text
758
759/*
760 *	lck_rw_type_t lck_rw_done(lck_rw_t *)
761 *
762 */
763Entry(lck_rw_done)
7641:
765	mov	(%rdi), %eax		/* Load state bitfield, interlock and reader count */
766	testl   $(LCK_RW_INTERLOCK), %eax
767	jne     7f				/* wait for interlock to clear */
768
769	movl	%eax, %ecx			/* keep original value in %eax for cmpxchgl */
770	testl	$(LCK_RW_SHARED_MASK), %ecx	/* if reader count == 0, must be exclusive lock */
771	je	2f
772	decl	%ecx				/* Decrement reader count */
773	testl	$(LCK_RW_SHARED_MASK), %ecx	/* if reader count has now gone to 0, check for waiters */
774	je	4f
775	jmp	6f
7762:
777	testl	$(LCK_RW_WANT_UPGRADE), %ecx
778	je	3f
779	andl	$(~LCK_RW_WANT_UPGRADE), %ecx
780	jmp	4f
7813:
782	testl	$(LCK_RW_WANT_WRITE), %ecx
783	je	8f				/* lock is not 'owned', go panic */
784	andl	$(~LCK_RW_WANT_WRITE), %ecx
7854:
786	/*
787	 * test the original values to match what
788	 * lck_rw_done_gen is going to do to determine
789	 * which wakeups need to happen...
790	 *
791	 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
792	 */
793	testl	$(LCK_W_WAITING), %eax
794	je	5f
795	andl	$(~LCK_W_WAITING), %ecx
796
797	testl	$(LCK_RW_PRIV_EXCL), %eax
798	jne	6f
7995:
800	andl	$(~LCK_R_WAITING), %ecx
8016:
802	lock
803	cmpxchgl %ecx, (%rdi)			/* Attempt atomic exchange */
804	jne	7f
805
806	mov	%eax,%esi	/* old flags in %rsi */
807				/* lock is in %rdi already */
808	call	EXT(lck_rw_done_gen)
809	ret
8107:
811	PAUSE
812	jmp	1b
8138:
814	ALIGN_STACK()
815	LOAD_STRING_ARG0(rwl_release_error_str)
816	CALL_PANIC()
817
818
819
820/*
821 *	lck_rw_type_t lck_rw_lock_exclusive_to_shared(lck_rw_t *)
822 *
823 */
824Entry(lck_rw_lock_exclusive_to_shared)
8251:
826	mov	(%rdi), %eax		/* Load state bitfield, interlock and reader count */
827	testl   $(LCK_RW_INTERLOCK), %eax
828	jne     6f				/* wait for interlock to clear */
829
830	movl	%eax, %ecx			/* keep original value in %eax for cmpxchgl */
831	incl	%ecx				/* Increment reader count */
832
833	testl	$(LCK_RW_WANT_UPGRADE), %ecx
834	je	2f
835	andl	$(~LCK_RW_WANT_UPGRADE), %ecx
836	jmp	3f
8372:
838	andl	$(~LCK_RW_WANT_WRITE), %ecx
8393:
840	/*
841	 * test the original values to match what
842	 * lck_rw_lock_exclusive_to_shared_gen is going to do to determine
843	 * which wakeups need to happen...
844	 *
845	 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
846	 */
847	testl	$(LCK_W_WAITING), %eax
848	je	4f
849	testl	$(LCK_RW_PRIV_EXCL), %eax
850	jne	5f
8514:
852	andl	$(~LCK_R_WAITING), %ecx
8535:
854	lock
855	cmpxchgl %ecx, (%rdi)			/* Attempt atomic exchange */
856	jne	6f
857
858	mov	%eax,%esi
859	call	EXT(lck_rw_lock_exclusive_to_shared_gen)
860	ret
8616:
862	PAUSE
863	jmp	1b
864
865
866
867/*
868 *	int lck_rw_grab_want(lck_rw_t *)
869 *
870 */
871Entry(lck_rw_grab_want)
8721:
873	mov	(%rdi), %eax		/* Load state bitfield, interlock and reader count */
874	testl   $(LCK_RW_INTERLOCK), %eax
875	jne     3f				/* wait for interlock to clear */
876	testl	$(LCK_RW_WANT_WRITE), %eax	/* want_write has been grabbed by someone else */
877	jne	2f				/* go return failure */
878
879	movl	%eax, %ecx			/* original value in %eax for cmpxchgl */
880	orl	$(LCK_RW_WANT_WRITE), %ecx
881	lock
882	cmpxchgl %ecx, (%rdi)			/* Attempt atomic exchange */
883	jne	2f
884						/* we now own want_write */
885	movl	$1, %eax			/* return success */
886	ret
8872:
888	xorl	%eax, %eax			/* return failure */
889	ret
8903:
891	PAUSE
892	jmp	1b
893
894
895#define	RW_LOCK_SHARED_OR_UPGRADE_MASK (LCK_RW_SHARED_MASK | LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE)
896/*
897 *	int lck_rw_held_read_or_upgrade(lck_rw_t *)
898 *
899 */
900Entry(lck_rw_held_read_or_upgrade)
901	mov	(%rdi), %eax
902	andl	$(RW_LOCK_SHARED_OR_UPGRADE_MASK), %eax
903	ret
904
905
906
907/*
908 * N.B.: On x86, statistics are currently recorded for all indirect mutexes.
909 * Also, only the acquire attempt count (GRP_MTX_STAT_UTIL) is maintained
910 * as a 64-bit quantity (this matches the existing PowerPC implementation,
911 * and the new x86 specific statistics are also maintained as 32-bit
912 * quantities).
913 *
914 *
915 * Enable this preprocessor define to record the first miss alone
916 * By default, we count every miss, hence multiple misses may be
917 * recorded for a single lock acquire attempt via lck_mtx_lock
918 */
919#undef LOG_FIRST_MISS_ALONE
920
921/*
922 * This preprocessor define controls whether the R-M-W update of the
923 * per-group statistics elements are atomic (LOCK-prefixed)
924 * Enabled by default.
925 */
926#define ATOMIC_STAT_UPDATES 1
927
928#if defined(ATOMIC_STAT_UPDATES)
929#define LOCK_IF_ATOMIC_STAT_UPDATES lock
930#else
931#define LOCK_IF_ATOMIC_STAT_UPDATES
932#endif /* ATOMIC_STAT_UPDATES */
933
934
935/*
936 * For most routines, the lck_mtx_t pointer is loaded into a
937 * register initially, and the owner field checked for indirection.
938 * Eventually the lock owner is loaded into a register and examined.
939 */
940
941#define M_OWNER		MUTEX_OWNER
942#define M_PTR		MUTEX_PTR
943#define M_STATE		MUTEX_STATE
944
945
946#define LMTX_ENTER_EXTENDED					\
947	mov	M_PTR(%rdx), %rdx 			;	\
948	xor	%r11, %r11				;	\
949	mov	MUTEX_GRP(%rdx), %r10		 	;	\
950	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
951	incq	GRP_MTX_STAT_UTIL(%r10)
952
953
954#if	LOG_FIRST_MISS_ALONE
955#define LMTX_UPDATE_MISS					\
956	test	$1, %r11 				;	\
957	jnz	11f					;	\
958	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
959	incl	GRP_MTX_STAT_MISS(%r10)			;	\
960	or	$1, %r11				;	\
96111:
962#else
963#define LMTX_UPDATE_MISS					\
964	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
965	incl	GRP_MTX_STAT_MISS(%r10)
966#endif
967
968
969#if	LOG_FIRST_MISS_ALONE
970#define LMTX_UPDATE_WAIT					\
971	test	$2, %r11 				;	\
972	jnz	11f					;	\
973	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
974	incl	GRP_MTX_STAT_WAIT(%r10)			;	\
975	or	$2, %r11				;	\
97611:
977#else
978#define LMTX_UPDATE_WAIT					\
979	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
980	incl	GRP_MTX_STAT_WAIT(%r10)
981#endif
982
983
984/*
985 * Record the "direct wait" statistic, which indicates if a
986 * miss proceeded to block directly without spinning--occurs
987 * if the owner of the mutex isn't running on another processor
988 * at the time of the check.
989 */
990#define LMTX_UPDATE_DIRECT_WAIT					\
991	LOCK_IF_ATOMIC_STAT_UPDATES 			;	\
992	incl	GRP_MTX_STAT_DIRECT_WAIT(%r10)
993
994
995#define LMTX_CALLEXT1(func_name)		\
996	cmp	%rdx, %rdi		;	\
997	je	12f			;	\
998	push	%r10			;	\
999	push	%r11			;	\
100012:	push	%rdi			;	\
1001	push	%rdx			;	\
1002	mov	%rdx, %rdi		;	\
1003	call	EXT(func_name)		;	\
1004	pop	%rdx			;	\
1005	pop	%rdi			;	\
1006	cmp	%rdx, %rdi		;	\
1007	je	12f			;	\
1008	pop	%r11			;	\
1009	pop	%r10			;	\
101012:
1011
1012#define LMTX_CALLEXT2(func_name, reg)		\
1013	cmp	%rdx, %rdi		;	\
1014	je	12f			;	\
1015	push	%r10			;	\
1016	push	%r11			;	\
101712:	push	%rdi			;	\
1018	push	%rdx			;	\
1019	mov	reg, %rsi		;	\
1020	mov	%rdx, %rdi		;	\
1021	call	EXT(func_name)		;	\
1022	pop	%rdx			;	\
1023	pop	%rdi			;	\
1024	cmp	%rdx, %rdi		;	\
1025	je	12f			;	\
1026	pop	%r11			;	\
1027	pop	%r10			;	\
102812:
1029
1030
1031#define M_WAITERS_MSK		0x0000ffff
1032#define M_PRIORITY_MSK		0x00ff0000
1033#define M_ILOCKED_MSK		0x01000000
1034#define M_MLOCKED_MSK		0x02000000
1035#define M_PROMOTED_MSK		0x04000000
1036#define M_SPIN_MSK		0x08000000
1037
1038/*
1039 *	void lck_mtx_assert(lck_mtx_t* l, unsigned int)
1040 *	Takes the address of a lock, and an assertion type as parameters.
1041 *	The assertion can take one of two forms determine by the type
1042 *	parameter: either the lock is held by the current thread, and the
1043 *	type is	LCK_MTX_ASSERT_OWNED, or it isn't and the type is
1044 *	LCK_MTX_ASSERT_NOTOWNED. Calls panic on assertion failure.
1045 *
1046 */
1047
1048NONLEAF_ENTRY(lck_mtx_assert)
1049        mov	%rdi, %rdx                   	/* Load lock address */
1050	mov	%gs:CPU_ACTIVE_THREAD, %rax	/* Load current thread */
1051
1052	mov	M_STATE(%rdx), %ecx
1053	cmp	$(MUTEX_IND), %ecx		/* Is this an indirect mutex? */
1054	jne	0f
1055	mov	M_PTR(%rdx), %rdx		/* If so, take indirection */
10560:
1057	mov	M_OWNER(%rdx), %rcx		/* Load owner */
1058	cmp	$(MUTEX_ASSERT_OWNED), %rsi
1059	jne	2f				/* Assert ownership? */
1060	cmp	%rax, %rcx			/* Current thread match? */
1061	jne	3f				/* no, go panic */
1062	testl	$(M_ILOCKED_MSK | M_MLOCKED_MSK), M_STATE(%rdx)
1063	je	3f
10641:						/* yes, we own it */
1065	NONLEAF_RET
10662:
1067	cmp	%rax, %rcx			/* Current thread match? */
1068	jne	1b				/* No, return */
1069	ALIGN_STACK()
1070	LOAD_PTR_ARG1(%rdx)
1071	LOAD_STRING_ARG0(mutex_assert_owned_str)
1072	jmp	4f
10733:
1074	ALIGN_STACK()
1075	LOAD_PTR_ARG1(%rdx)
1076	LOAD_STRING_ARG0(mutex_assert_not_owned_str)
10774:
1078	CALL_PANIC()
1079
1080
1081lck_mtx_destroyed:
1082	ALIGN_STACK()
1083	LOAD_PTR_ARG1(%rdx)
1084	LOAD_STRING_ARG0(mutex_interlock_destroyed_str)
1085	CALL_PANIC()
1086
1087
1088.data
1089mutex_assert_not_owned_str:
1090	.asciz	"mutex (%p) not owned\n"
1091mutex_assert_owned_str:
1092	.asciz	"mutex (%p) owned\n"
1093mutex_interlock_destroyed_str:
1094	.asciz	"trying to interlock destroyed mutex (%p)"
1095.text
1096
1097
1098
1099/*
1100 * lck_mtx_lock()
1101 * lck_mtx_try_lock()
1102 * lck_mtx_unlock()
1103 * lck_mtx_lock_spin()
1104 * lck_mtx_lock_spin_always()
1105 * lck_mtx_try_lock_spin()
1106 * lck_mtx_try_lock_spin_always()
1107 * lck_mtx_convert_spin()
1108 */
1109NONLEAF_ENTRY(lck_mtx_lock_spin_always)
1110	mov	%rdi, %rdx		/* fetch lock pointer */
1111	jmp     Llmls_avoid_check
1112
1113NONLEAF_ENTRY(lck_mtx_lock_spin)
1114	mov	%rdi, %rdx		/* fetch lock pointer */
1115
1116	CHECK_PREEMPTION_LEVEL()
1117Llmls_avoid_check:
1118	mov	M_STATE(%rdx), %ecx
1119	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx	/* is the interlock or mutex held */
1120	jnz	Llmls_slow
1121Llmls_try:				/* no - can't be INDIRECT, DESTROYED or locked */
1122	mov	%rcx, %rax		/* eax contains snapshot for cmpxchgl */
1123	or	$(M_ILOCKED_MSK | M_SPIN_MSK), %ecx
1124
1125	PREEMPTION_DISABLE
1126	lock
1127	cmpxchg %ecx, M_STATE(%rdx)	/* atomic compare and exchange */
1128	jne	Llmls_busy_disabled
1129
1130 	mov	%gs:CPU_ACTIVE_THREAD, %rax
1131	mov	%rax, M_OWNER(%rdx)	/* record owner of interlock */
1132#if	MACH_LDEBUG
1133	test	%rax, %rax
1134	jz	1f
1135	incl	TH_MUTEX_COUNT(%rax)	/* lock statistic */
11361:
1137#endif	/* MACH_LDEBUG */
1138
1139	/* return with the interlock held and preemption disabled */
1140	leave
1141#if	CONFIG_DTRACE
1142	LOCKSTAT_LABEL(_lck_mtx_lock_spin_lockstat_patch_point)
1143	ret
1144	/* inherit lock pointer in %rdx above */
1145	LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, %rdx)
1146#endif
1147	ret
1148
1149Llmls_slow:
1150	test	$M_ILOCKED_MSK, %ecx		/* is the interlock held */
1151	jz	Llml_contended			/* no, must have been the mutex */
1152
1153	cmp	$(MUTEX_DESTROYED), %ecx	/* check to see if its marked destroyed */
1154	je	lck_mtx_destroyed
1155	cmp	$(MUTEX_IND), %ecx		/* Is this an indirect mutex */
1156	jne	Llmls_loop			/* no... must be interlocked */
1157
1158	LMTX_ENTER_EXTENDED
1159
1160	mov	M_STATE(%rdx), %ecx
1161	test	$(M_SPIN_MSK), %ecx
1162	jz	Llmls_loop1
1163
1164	LMTX_UPDATE_MISS		/* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
1165Llmls_loop:
1166	PAUSE
1167	mov	M_STATE(%rdx), %ecx
1168Llmls_loop1:
1169	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx
1170	jz	Llmls_try
1171	test	$(M_MLOCKED_MSK), %ecx
1172	jnz	Llml_contended			/* mutex owned by someone else, go contend for it */
1173	jmp	Llmls_loop
1174
1175Llmls_busy_disabled:
1176	PREEMPTION_ENABLE
1177	jmp	Llmls_loop
1178
1179
1180
1181NONLEAF_ENTRY(lck_mtx_lock)
1182	mov	%rdi, %rdx		/* fetch lock pointer */
1183
1184	CHECK_PREEMPTION_LEVEL()
1185
1186	mov	M_STATE(%rdx), %ecx
1187	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx	/* is the interlock or mutex held */
1188	jnz	Llml_slow
1189Llml_try:				/* no - can't be INDIRECT, DESTROYED or locked */
1190	mov	%rcx, %rax		/* eax contains snapshot for cmpxchgl */
1191	or	$(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx
1192
1193	PREEMPTION_DISABLE
1194	lock
1195	cmpxchg %ecx, M_STATE(%rdx)	/* atomic compare and exchange */
1196	jne	Llml_busy_disabled
1197
1198 	mov	%gs:CPU_ACTIVE_THREAD, %rax
1199	mov	%rax, M_OWNER(%rdx)	/* record owner of mutex */
1200#if	MACH_LDEBUG
1201	test	%rax, %rax
1202	jz	1f
1203	incl	TH_MUTEX_COUNT(%rax)	/* lock statistic */
12041:
1205#endif	/* MACH_LDEBUG */
1206
1207	testl	$(M_WAITERS_MSK), M_STATE(%rdx)
1208	jz	Llml_finish
1209
1210	LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
1211
1212Llml_finish:
1213	andl	$(~M_ILOCKED_MSK), M_STATE(%rdx)
1214	PREEMPTION_ENABLE
1215
1216	cmp	%rdx, %rdi		/* is this an extended mutex */
1217	jne	2f
1218
1219	leave
1220#if	CONFIG_DTRACE
1221	LOCKSTAT_LABEL(_lck_mtx_lock_lockstat_patch_point)
1222	ret
1223	/* inherit lock pointer in %rdx above */
1224	LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, %rdx)
1225#endif
1226	ret
12272:
1228	leave
1229#if	CONFIG_DTRACE
1230	LOCKSTAT_LABEL(_lck_mtx_lock_ext_lockstat_patch_point)
1231	ret
1232	/* inherit lock pointer in %rdx above */
1233	LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, %rdx)
1234#endif
1235	ret
1236
1237
1238Llml_slow:
1239	test	$M_ILOCKED_MSK, %ecx		/* is the interlock held */
1240	jz	Llml_contended			/* no, must have been the mutex */
1241
1242	cmp	$(MUTEX_DESTROYED), %ecx	/* check to see if its marked destroyed */
1243	je	lck_mtx_destroyed
1244	cmp	$(MUTEX_IND), %ecx		/* Is this an indirect mutex? */
1245	jne	Llml_loop			/* no... must be interlocked */
1246
1247	LMTX_ENTER_EXTENDED
1248
1249	mov	M_STATE(%rdx), %ecx
1250	test	$(M_SPIN_MSK), %ecx
1251	jz	Llml_loop1
1252
1253	LMTX_UPDATE_MISS		/* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
1254Llml_loop:
1255	PAUSE
1256	mov	M_STATE(%rdx), %ecx
1257Llml_loop1:
1258	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx
1259	jz	Llml_try
1260	test	$(M_MLOCKED_MSK), %ecx
1261	jnz	Llml_contended			/* mutex owned by someone else, go contend for it */
1262	jmp	Llml_loop
1263
1264Llml_busy_disabled:
1265	PREEMPTION_ENABLE
1266	jmp	Llml_loop
1267
1268
1269Llml_contended:
1270	cmp	%rdx, %rdi		/* is this an extended mutex */
1271	je	0f
1272	LMTX_UPDATE_MISS
12730:
1274	LMTX_CALLEXT1(lck_mtx_lock_spinwait_x86)
1275
1276	test	%rax, %rax
1277	jz	Llml_acquired		/* acquired mutex, interlock held and preemption disabled */
1278
1279	cmp	$1, %rax		/* check for direct wait status */
1280	je	2f
1281	cmp	%rdx, %rdi		/* is this an extended mutex */
1282	je	2f
1283	LMTX_UPDATE_DIRECT_WAIT
12842:
1285	mov	M_STATE(%rdx), %ecx
1286	test	$(M_ILOCKED_MSK), %ecx
1287	jnz	6f
1288
1289	mov	%rcx, %rax		/* eax contains snapshot for cmpxchgl */
1290	or	$(M_ILOCKED_MSK), %ecx	/* try to take the interlock */
1291
1292	PREEMPTION_DISABLE
1293	lock
1294	cmpxchg %ecx, M_STATE(%rdx)	/* atomic compare and exchange */
1295	jne	5f
1296
1297	test	$(M_MLOCKED_MSK), %ecx	/* we've got the interlock and */
1298	jnz	3f
1299	or	$(M_MLOCKED_MSK), %ecx	/* the mutex is free... grab it directly */
1300	mov	%ecx, M_STATE(%rdx)
1301
1302 	mov	%gs:CPU_ACTIVE_THREAD, %rax
1303	mov	%rax, M_OWNER(%rdx)	/* record owner of mutex */
1304#if	MACH_LDEBUG
1305	test	%rax, %rax
1306	jz	1f
1307	incl	TH_MUTEX_COUNT(%rax)	/* lock statistic */
13081:
1309#endif	/* MACH_LDEBUG */
1310
1311Llml_acquired:
1312	testl	$(M_WAITERS_MSK), M_STATE(%rdx)
1313	jnz	1f
1314	mov	M_OWNER(%rdx), %rax
1315	mov	TH_WAS_PROMOTED_ON_WAKEUP(%rax), %eax
1316	test	%eax, %eax
1317	jz	Llml_finish
13181:
1319	LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
1320	jmp	Llml_finish
1321
13223:					/* interlock held, mutex busy */
1323	cmp	%rdx, %rdi		/* is this an extended mutex */
1324	je	4f
1325	LMTX_UPDATE_WAIT
13264:
1327	LMTX_CALLEXT1(lck_mtx_lock_wait_x86)
1328	jmp	Llml_contended
13295:
1330	PREEMPTION_ENABLE
13316:
1332	PAUSE
1333	jmp	2b
1334
1335
1336NONLEAF_ENTRY(lck_mtx_try_lock_spin_always)
1337	mov	%rdi, %rdx		/* fetch lock pointer */
1338	jmp     Llmts_avoid_check
1339
1340NONLEAF_ENTRY(lck_mtx_try_lock_spin)
1341	mov	%rdi, %rdx		/* fetch lock pointer */
1342
1343Llmts_avoid_check:
1344	mov	M_STATE(%rdx), %ecx
1345	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx	/* is the interlock or mutex held */
1346	jnz	Llmts_slow
1347Llmts_try:				/* no - can't be INDIRECT, DESTROYED or locked */
1348	mov	%rcx, %rax		/* eax contains snapshot for cmpxchgl */
1349	or	$(M_ILOCKED_MSK | M_SPIN_MSK), %rcx
1350
1351	PREEMPTION_DISABLE
1352	lock
1353	cmpxchg %ecx, M_STATE(%rdx)	/* atomic compare and exchange */
1354	jne	Llmts_busy_disabled
1355
1356 	mov	%gs:CPU_ACTIVE_THREAD, %rax
1357	mov	%rax, M_OWNER(%rdx)	/* record owner of mutex */
1358#if	MACH_LDEBUG
1359	test	%rax, %rax
1360	jz	1f
1361	incl	TH_MUTEX_COUNT(%rax)	/* lock statistic */
13621:
1363#endif	/* MACH_LDEBUG */
1364
1365	leave
1366
1367#if	CONFIG_DTRACE
1368	mov	$1, %rax			/* return success */
1369	LOCKSTAT_LABEL(_lck_mtx_try_lock_spin_lockstat_patch_point)
1370	ret
1371	/* inherit lock pointer in %rdx above */
1372	LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, %rdx)
1373#endif
1374	mov	$1, %rax			/* return success */
1375	ret
1376
1377Llmts_slow:
1378	test	$(M_ILOCKED_MSK), %ecx	/* is the interlock held */
1379	jz	Llmts_fail			/* no, must be held as a mutex */
1380
1381	cmp	$(MUTEX_DESTROYED), %ecx	/* check to see if its marked destroyed */
1382	je	lck_mtx_destroyed
1383	cmp	$(MUTEX_IND), %ecx		/* Is this an indirect mutex? */
1384	jne	Llmts_loop1
1385
1386	LMTX_ENTER_EXTENDED
1387Llmts_loop:
1388	PAUSE
1389	mov	M_STATE(%rdx), %ecx
1390Llmts_loop1:
1391	test	$(M_MLOCKED_MSK | M_SPIN_MSK), %ecx
1392	jnz	Llmts_fail
1393	test	$(M_ILOCKED_MSK), %ecx
1394	jz	Llmts_try
1395	jmp	Llmts_loop
1396
1397Llmts_busy_disabled:
1398	PREEMPTION_ENABLE
1399	jmp	Llmts_loop
1400
1401
1402
1403NONLEAF_ENTRY(lck_mtx_try_lock)
1404	mov	%rdi, %rdx		/* fetch lock pointer */
1405
1406	mov	M_STATE(%rdx), %ecx
1407	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx	/* is the interlock or mutex held */
1408	jnz	Llmt_slow
1409Llmt_try:				/* no - can't be INDIRECT, DESTROYED or locked */
1410	mov	%rcx, %rax		/* eax contains snapshot for cmpxchgl */
1411	or	$(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx
1412
1413	PREEMPTION_DISABLE
1414	lock
1415	cmpxchg %ecx, M_STATE(%rdx)	/* atomic compare and exchange */
1416	jne	Llmt_busy_disabled
1417
1418 	mov	%gs:CPU_ACTIVE_THREAD, %rax
1419	mov	%rax, M_OWNER(%rdx)	/* record owner of mutex */
1420#if	MACH_LDEBUG
1421	test	%rax, %rax
1422	jz	1f
1423	incl	TH_MUTEX_COUNT(%rax)	/* lock statistic */
14241:
1425#endif	/* MACH_LDEBUG */
1426
1427	test	$(M_WAITERS_MSK), %ecx
1428	jz	0f
1429
1430	LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
14310:
1432	andl	$(~M_ILOCKED_MSK), M_STATE(%rdx)
1433	PREEMPTION_ENABLE
1434
1435	leave
1436#if	CONFIG_DTRACE
1437	mov	$1, %rax			/* return success */
1438	/* Dtrace probe: LS_LCK_MTX_TRY_LOCK_ACQUIRE */
1439	LOCKSTAT_LABEL(_lck_mtx_try_lock_lockstat_patch_point)
1440	ret
1441	/* inherit lock pointer in %rdx from above */
1442	LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, %rdx)
1443#endif
1444	mov	$1, %rax			/* return success */
1445	ret
1446
1447Llmt_slow:
1448	test	$(M_ILOCKED_MSK), %ecx	/* is the interlock held */
1449	jz	Llmt_fail			/* no, must be held as a mutex */
1450
1451	cmp	$(MUTEX_DESTROYED), %ecx	/* check to see if its marked destroyed */
1452	je	lck_mtx_destroyed
1453	cmp	$(MUTEX_IND), %ecx		/* Is this an indirect mutex? */
1454	jne	Llmt_loop
1455
1456	LMTX_ENTER_EXTENDED
1457Llmt_loop:
1458	PAUSE
1459	mov	M_STATE(%rdx), %ecx
1460Llmt_loop1:
1461	test	$(M_MLOCKED_MSK | M_SPIN_MSK), %ecx
1462	jnz	Llmt_fail
1463	test	$(M_ILOCKED_MSK), %ecx
1464	jz	Llmt_try
1465	jmp	Llmt_loop
1466
1467Llmt_busy_disabled:
1468	PREEMPTION_ENABLE
1469	jmp	Llmt_loop
1470
1471
1472Llmt_fail:
1473Llmts_fail:
1474	cmp	%rdx, %rdi			/* is this an extended mutex */
1475	je	0f
1476	LMTX_UPDATE_MISS
14770:
1478	xor	%rax, %rax
1479	NONLEAF_RET
1480
1481
1482
1483NONLEAF_ENTRY(lck_mtx_convert_spin)
1484	mov	%rdi, %rdx			/* fetch lock pointer */
1485
1486	mov	M_STATE(%rdx), %ecx
1487	cmp	$(MUTEX_IND), %ecx		/* Is this an indirect mutex? */
1488	jne	0f
1489	mov	M_PTR(%rdx), %rdx		/* If so, take indirection */
1490	mov	M_STATE(%rdx), %ecx
14910:
1492	test	$(M_MLOCKED_MSK), %ecx		/* already owned as a mutex, just return */
1493	jnz	2f
1494	test	$(M_WAITERS_MSK), %ecx		/* are there any waiters? */
1495	jz	1f
1496
1497	LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
1498	mov	M_STATE(%rdx), %ecx
14991:
1500	and	$(~(M_ILOCKED_MSK | M_SPIN_MSK)), %ecx	/* convert from spin version to mutex */
1501	or	$(M_MLOCKED_MSK), %ecx
1502	mov	%ecx, M_STATE(%rdx)		/* since I own the interlock, I don't need an atomic update */
1503
1504	PREEMPTION_ENABLE
15052:
1506	NONLEAF_RET
1507
1508
1509
1510NONLEAF_ENTRY(lck_mtx_unlock)
1511	mov	%rdi, %rdx		/* fetch lock pointer */
1512Llmu_entry:
1513	mov	M_STATE(%rdx), %ecx
1514Llmu_prim:
1515	cmp	$(MUTEX_IND), %ecx	/* Is this an indirect mutex? */
1516	je	Llmu_ext
1517
1518Llmu_chktype:
1519	test	$(M_MLOCKED_MSK), %ecx	/* check for full mutex */
1520	jz	Llmu_unlock
1521Llmu_mutex:
1522	test	$(M_ILOCKED_MSK), %rcx	/* have to wait for interlock to clear */
1523	jnz	Llmu_busy
1524
1525	mov	%rcx, %rax		/* eax contains snapshot for cmpxchgl */
1526	and	$(~M_MLOCKED_MSK), %ecx	/* drop mutex */
1527	or	$(M_ILOCKED_MSK), %ecx	/* pick up interlock */
1528
1529	PREEMPTION_DISABLE
1530	lock
1531	cmpxchg %ecx, M_STATE(%rdx)	/* atomic compare and exchange */
1532	jne	Llmu_busy_disabled	/* branch on failure to spin loop */
1533
1534Llmu_unlock:
1535	xor	%rax, %rax
1536	mov	%rax, M_OWNER(%rdx)
1537	mov	%rcx, %rax		/* keep original state in %ecx for later evaluation */
1538	and	$(~(M_ILOCKED_MSK | M_SPIN_MSK | M_PROMOTED_MSK)), %rax
1539
1540	test	$(M_WAITERS_MSK), %eax
1541	jz	2f
1542	dec	%eax			/* decrement waiter count */
15432:
1544	mov	%eax, M_STATE(%rdx)	/* since I own the interlock, I don't need an atomic update */
1545
1546#if	MACH_LDEBUG
1547	/* perform lock statistics after drop to prevent delay */
1548	mov	%gs:CPU_ACTIVE_THREAD, %rax
1549	test	%rax, %rax
1550	jz	1f
1551	decl	TH_MUTEX_COUNT(%rax)	/* lock statistic */
15521:
1553#endif	/* MACH_LDEBUG */
1554
1555	test	$(M_PROMOTED_MSK | M_WAITERS_MSK), %ecx
1556	jz	3f
1557
1558	LMTX_CALLEXT2(lck_mtx_unlock_wakeup_x86, %rcx)
15593:
1560	PREEMPTION_ENABLE
1561
1562	cmp	%rdx, %rdi
1563	jne	4f
1564
1565	leave
1566#if	CONFIG_DTRACE
1567	/* Dtrace: LS_LCK_MTX_UNLOCK_RELEASE */
1568	LOCKSTAT_LABEL(_lck_mtx_unlock_lockstat_patch_point)
1569	ret
1570	/* inherit lock pointer in %rdx from above */
1571	LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, %rdx)
1572#endif
1573	ret
15744:
1575	leave
1576#if	CONFIG_DTRACE
1577	/* Dtrace: LS_LCK_MTX_EXT_UNLOCK_RELEASE */
1578	LOCKSTAT_LABEL(_lck_mtx_ext_unlock_lockstat_patch_point)
1579	ret
1580	/* inherit lock pointer in %rdx from above */
1581	LOCKSTAT_RECORD(LS_LCK_MTX_EXT_UNLOCK_RELEASE, %rdx)
1582#endif
1583	ret
1584
1585
1586Llmu_busy_disabled:
1587	PREEMPTION_ENABLE
1588Llmu_busy:
1589	PAUSE
1590	mov	M_STATE(%rdx), %ecx
1591	jmp	Llmu_mutex
1592
1593Llmu_ext:
1594	mov	M_PTR(%rdx), %rdx
1595	mov	M_OWNER(%rdx), %rax
1596	mov	%gs:CPU_ACTIVE_THREAD, %rcx
1597	CHECK_UNLOCK(%rcx, %rax)
1598	mov	M_STATE(%rdx), %ecx
1599	jmp 	Llmu_chktype
1600
1601
1602
1603LEAF_ENTRY(lck_mtx_ilk_unlock)
1604	mov	%rdi, %rdx		/* fetch lock pointer - no indirection here */
1605
1606	andl	$(~M_ILOCKED_MSK), M_STATE(%rdx)
1607
1608	PREEMPTION_ENABLE		/* need to re-enable preemption */
1609
1610	LEAF_RET
1611
1612
1613
1614LEAF_ENTRY(lck_mtx_lock_grab_mutex)
1615	mov	%rdi, %rdx		/* fetch lock pointer - no indirection here */
1616
1617	mov	M_STATE(%rdx), %ecx
1618
1619	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx	/* can't have the mutex yet */
1620	jnz	3f
1621
1622	mov	%rcx, %rax		/* eax contains snapshot for cmpxchgl */
1623	or	$(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx
1624
1625	PREEMPTION_DISABLE
1626	lock
1627	cmpxchg %ecx, M_STATE(%rdx)	/* atomic compare and exchange */
1628	jne	2f				/* branch on failure to spin loop */
1629
1630 	mov	%gs:CPU_ACTIVE_THREAD, %rax
1631	mov	%rax, M_OWNER(%rdx)	/* record owner of mutex */
1632#if	MACH_LDEBUG
1633	test	%rax, %rax
1634	jz	1f
1635	incl	TH_MUTEX_COUNT(%rax)	/* lock statistic */
16361:
1637#endif	/* MACH_LDEBUG */
1638
1639	mov	$1, %rax		/* return success */
1640	LEAF_RET
16412:
1642	PREEMPTION_ENABLE
16433:
1644	xor	%rax, %rax	/* return failure */
1645	LEAF_RET
1646
1647
1648
1649LEAF_ENTRY(lck_mtx_lock_mark_destroyed)
1650	mov	%rdi, %rdx
16511:
1652	mov	M_STATE(%rdx), %ecx
1653	cmp	$(MUTEX_IND), %ecx	/* Is this an indirect mutex? */
1654	jne	2f
1655
1656	movl	$(MUTEX_DESTROYED), M_STATE(%rdx)	/* convert to destroyed state */
1657	jmp	3f
16582:
1659	test	$(M_ILOCKED_MSK), %rcx	/* have to wait for interlock to clear */
1660	jnz	5f
1661
1662	PREEMPTION_DISABLE
1663	mov	%rcx, %rax		/* eax contains snapshot for cmpxchgl */
1664	or	$(M_ILOCKED_MSK), %ecx
1665	lock
1666	cmpxchg %ecx, M_STATE(%rdx)	/* atomic compare and exchange */
1667	jne	4f			/* branch on failure to spin loop */
1668	movl	$(MUTEX_DESTROYED), M_STATE(%rdx)	/* convert to destroyed state */
1669	PREEMPTION_ENABLE
16703:
1671	LEAF_RET			/* return with M_ILOCKED set */
16724:
1673	PREEMPTION_ENABLE
16745:
1675	PAUSE
1676	jmp	1b
1677
1678LEAF_ENTRY(preemption_underflow_panic)
1679	FRAME
1680	incl	%gs:CPU_PREEMPTION_LEVEL
1681	ALIGN_STACK()
1682	LOAD_STRING_ARG0(16f)
1683	CALL_PANIC()
1684	hlt
1685	.data
168616:	String	"Preemption level underflow, possible cause unlocking an unlocked mutex or spinlock"
1687	.text
1688
1689
1690LEAF_ENTRY(_disable_preemption)
1691#if	MACH_RT
1692	PREEMPTION_DISABLE
1693#endif	/* MACH_RT */
1694	LEAF_RET
1695
1696LEAF_ENTRY(_enable_preemption)
1697#if	MACH_RT
1698#if	MACH_ASSERT
1699	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL
1700	jg	1f
1701	movl	%gs:CPU_PREEMPTION_LEVEL,%esi
1702	ALIGN_STACK()
1703	LOAD_STRING_ARG0(_enable_preemption_less_than_zero)
1704	CALL_PANIC()
1705	hlt
1706	.cstring
1707_enable_preemption_less_than_zero:
1708	.asciz	"_enable_preemption: preemption_level(%d)  < 0!"
1709	.text
17101:
1711#endif	/* MACH_ASSERT */
1712	PREEMPTION_ENABLE
1713#endif	/* MACH_RT */
1714	LEAF_RET
1715
1716LEAF_ENTRY(_enable_preemption_no_check)
1717#if	MACH_RT
1718#if	MACH_ASSERT
1719	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL
1720	jg	1f
1721	ALIGN_STACK()
1722	LOAD_STRING_ARG0(_enable_preemption_no_check_less_than_zero)
1723	CALL_PANIC()
1724	hlt
1725	.cstring
1726_enable_preemption_no_check_less_than_zero:
1727	.asciz	"_enable_preemption_no_check: preemption_level <= 0!"
1728	.text
17291:
1730#endif	/* MACH_ASSERT */
1731	_ENABLE_PREEMPTION_NO_CHECK
1732#endif	/* MACH_RT */
1733	LEAF_RET
1734
1735
1736LEAF_ENTRY(_mp_disable_preemption)
1737#if	MACH_RT
1738	PREEMPTION_DISABLE
1739#endif	/* MACH_RT */
1740	LEAF_RET
1741
1742LEAF_ENTRY(_mp_enable_preemption)
1743#if	MACH_RT
1744#if	MACH_ASSERT
1745	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL
1746	jg	1f
1747	movl	%gs:CPU_PREEMPTION_LEVEL,%esi
1748	ALIGN_PANIC()
1749	LOAD_STRING_ARG0(_mp_enable_preemption_less_than_zero)
1750	CALL_PANIC()
1751	hlt
1752	.cstring
1753_mp_enable_preemption_less_than_zero:
1754	.asciz "_mp_enable_preemption: preemption_level (%d) <= 0!"
1755	.text
17561:
1757#endif	/* MACH_ASSERT */
1758	PREEMPTION_ENABLE
1759#endif	/* MACH_RT */
1760	LEAF_RET
1761
1762LEAF_ENTRY(_mp_enable_preemption_no_check)
1763#if	MACH_RT
1764#if	MACH_ASSERT
1765	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL
1766	jg	1f
1767	ALIGN_STACK()
1768	LOAD_STRING_ARG0(_mp_enable_preemption_no_check_less_than_zero)
1769	CALL_PANIC()
1770	hlt
1771	.cstring
1772_mp_enable_preemption_no_check_less_than_zero:
1773	.asciz "_mp_enable_preemption_no_check: preemption_level <= 0!"
1774	.text
17751:
1776#endif	/* MACH_ASSERT */
1777	_ENABLE_PREEMPTION_NO_CHECK
1778#endif	/* MACH_RT */
1779	LEAF_RET
1780
1781/*
1782 * Atomic primitives, prototyped in kern/simple_lock.h
1783 */
1784LEAF_ENTRY(hw_atomic_add)
1785#if	MACH_LDEBUG
1786	test	$3, %rdi
1787	jz	1f
1788	ud2
17891:
1790#endif
1791	movl	%esi, %eax		/* Load addend */
1792	lock 	xaddl %eax, (%rdi)	/* Atomic exchange and add */
1793	addl	%esi, %eax		/* Calculate result */
1794	LEAF_RET
1795
1796LEAF_ENTRY(hw_atomic_sub)
1797#if	MACH_LDEBUG
1798	test	$3, %rdi
1799	jz	1f
1800	ud2
18011:
1802#endif
1803	negl	%esi
1804	movl	%esi, %eax
1805	lock	xaddl %eax, (%rdi)	/* Atomic exchange and add */
1806	addl	%esi, %eax		/* Calculate result */
1807	LEAF_RET
1808
1809LEAF_ENTRY(hw_atomic_or)
1810#if	MACH_LDEBUG
1811	test	$3, %rdi
1812	jz	1f
1813	ud2
18141:
1815#endif
1816	movl	(%rdi), %eax
18171:
1818	movl	%esi, %edx		/* Load mask */
1819	orl	%eax, %edx
1820	lock	cmpxchgl %edx, (%rdi)	/* Atomic CAS */
1821	jne	1b
1822	movl	%edx, %eax		/* Result */
1823	LEAF_RET
1824/*
1825 * A variant of hw_atomic_or which doesn't return a value.
1826 * The implementation is thus comparatively more efficient.
1827 */
1828
1829LEAF_ENTRY(hw_atomic_or_noret)
1830#if	MACH_LDEBUG
1831	test	$3, %rdi
1832	jz	1f
1833	ud2
18341:
1835#endif
1836	lock
1837	orl	%esi, (%rdi)		/* Atomic OR */
1838	LEAF_RET
1839
1840
1841LEAF_ENTRY(hw_atomic_and)
1842#if	MACH_LDEBUG
1843	test	$3, %rdi
1844	jz	1f
1845	ud2
18461:
1847#endif
1848	movl	(%rdi), %eax
18491:
1850	movl	%esi, %edx		/* Load mask */
1851	andl	%eax, %edx
1852	lock	cmpxchgl %edx, (%rdi)	/* Atomic CAS */
1853	jne	1b
1854	movl	%edx, %eax		/* Result */
1855	LEAF_RET
1856/*
1857 * A variant of hw_atomic_and which doesn't return a value.
1858 * The implementation is thus comparatively more efficient.
1859 */
1860
1861LEAF_ENTRY(hw_atomic_and_noret)
1862#if	MACH_LDEBUG
1863	test	$3, %rdi
1864	jz	1f
1865	ud2
18661:
1867#endif
1868	lock	andl	%esi, (%rdi)	/* Atomic OR */
1869	LEAF_RET
1870
1871