1/*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 *	File:	kern/lock.c
58 *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
59 *	Date:	1985
60 *
61 *	Locking primitives implementation
62 */
63
64#include <mach_kdb.h>
65#include <mach_ldebug.h>
66
67#include <kern/kalloc.h>
68#include <kern/lock.h>
69#include <kern/locks.h>
70#include <kern/misc_protos.h>
71#include <kern/thread.h>
72#include <kern/processor.h>
73#include <kern/sched_prim.h>
74#include <kern/xpr.h>
75#include <kern/debug.h>
76#include <string.h>
77
78#if	MACH_KDB
79#include <ddb/db_command.h>
80#include <ddb/db_output.h>
81#include <ddb/db_sym.h>
82#include <ddb/db_print.h>
83#endif	/* MACH_KDB */
84
85#ifdef __ppc__
86#include <ppc/Firmware.h>
87#endif
88
89#include <sys/kdebug.h>
90
91/*
92 * We need only enough declarations from the BSD-side to be able to
93 * test if our probe is active, and to call __dtrace_probe().  Setting
94 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
95 *
96 * Note that if CONFIG_DTRACE is off, the include file below stubs out
97 * the code hooks here.
98 */
99#if	CONFIG_DTRACE
100#define NEED_DTRACE_DEFS
101#include <../bsd/sys/lockstat.h>
102#endif
103
104#define	LCK_RW_LCK_EXCLUSIVE_CODE	0x100
105#define	LCK_RW_LCK_EXCLUSIVE1_CODE	0x101
106#define	LCK_RW_LCK_SHARED_CODE		0x102
107#define	LCK_RW_LCK_SH_TO_EX_CODE	0x103
108#define	LCK_RW_LCK_SH_TO_EX1_CODE	0x104
109#define	LCK_RW_LCK_EX_TO_SH_CODE	0x105
110
111
112#define	ANY_LOCK_DEBUG	(USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
113
114unsigned int lock_wait_time[2] = { (unsigned int)-1, 0 } ;
115
116/* Forwards */
117
118
119#if	USLOCK_DEBUG
120/*
121 *	Perform simple lock checks.
122 */
123int	uslock_check = 1;
124int	max_lock_loops	= 100000000;
125decl_simple_lock_data(extern , printf_lock)
126decl_simple_lock_data(extern , panic_lock)
127#if	MACH_KDB
128decl_simple_lock_data(extern , kdb_lock)
129#endif	/* MACH_KDB */
130#endif	/* USLOCK_DEBUG */
131
132
133/*
134 *	We often want to know the addresses of the callers
135 *	of the various lock routines.  However, this information
136 *	is only used for debugging and statistics.
137 */
138typedef void	*pc_t;
139#define	INVALID_PC	((void *) VM_MAX_KERNEL_ADDRESS)
140#define	INVALID_THREAD	((void *) VM_MAX_KERNEL_ADDRESS)
141#if	ANY_LOCK_DEBUG
142#define	OBTAIN_PC(pc,l)	((pc) = (void *) GET_RETURN_PC(&(l)))
143#else	/* ANY_LOCK_DEBUG */
144#ifdef	lint
145/*
146 *	Eliminate lint complaints about unused local pc variables.
147 */
148#define	OBTAIN_PC(pc,l)	++pc
149#else	/* lint */
150#define	OBTAIN_PC(pc,l)
151#endif	/* lint */
152#endif	/* USLOCK_DEBUG */
153
154
155/*
156 *	Portable lock package implementation of usimple_locks.
157 */
158
159#if	USLOCK_DEBUG
160#define	USLDBG(stmt)	stmt
161void		usld_lock_init(usimple_lock_t, unsigned short);
162void		usld_lock_pre(usimple_lock_t, pc_t);
163void		usld_lock_post(usimple_lock_t, pc_t);
164void		usld_unlock(usimple_lock_t, pc_t);
165void		usld_lock_try_pre(usimple_lock_t, pc_t);
166void		usld_lock_try_post(usimple_lock_t, pc_t);
167int		usld_lock_common_checks(usimple_lock_t, const char *);
168#else	/* USLOCK_DEBUG */
169#define	USLDBG(stmt)
170#endif	/* USLOCK_DEBUG */
171
172/*
173 *      Routine:        lck_spin_alloc_init
174 */
175lck_spin_t *
176lck_spin_alloc_init(
177	lck_grp_t	*grp,
178	lck_attr_t	*attr) {
179	lck_spin_t	*lck;
180
181	if ((lck = (lck_spin_t *)kalloc(sizeof(lck_spin_t))) != 0)
182		lck_spin_init(lck, grp, attr);
183
184	return(lck);
185}
186
187/*
188 *      Routine:        lck_spin_free
189 */
190void
191lck_spin_free(
192	lck_spin_t	*lck,
193	lck_grp_t	*grp) {
194	lck_spin_destroy(lck, grp);
195	kfree((void *)lck, sizeof(lck_spin_t));
196}
197
198/*
199 *      Routine:        lck_spin_init
200 */
201void
202lck_spin_init(
203	lck_spin_t		*lck,
204	lck_grp_t		*grp,
205	__unused lck_attr_t	*attr) {
206
207	lck->interlock = 0;
208	lck_grp_reference(grp);
209	lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
210}
211
212/*
213 *      Routine:        lck_spin_destroy
214 */
215void
216lck_spin_destroy(
217	lck_spin_t	*lck,
218	lck_grp_t	*grp) {
219	if (lck->interlock == LCK_SPIN_TAG_DESTROYED)
220		return;
221	lck->interlock = LCK_SPIN_TAG_DESTROYED;
222	lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
223	lck_grp_deallocate(grp);
224}
225
226/*
227 *	Initialize a usimple_lock.
228 *
229 *	No change in preemption state.
230 */
231void
232usimple_lock_init(
233	usimple_lock_t	l,
234	unsigned short	tag)
235{
236#ifndef	MACHINE_SIMPLE_LOCK
237	USLDBG(usld_lock_init(l, tag));
238	hw_lock_init(&l->interlock);
239#else
240	simple_lock_init((simple_lock_t)l,tag);
241#endif
242}
243
244
245/*
246 *	Acquire a usimple_lock.
247 *
248 *	Returns with preemption disabled.  Note
249 *	that the hw_lock routines are responsible for
250 *	maintaining preemption state.
251 */
252void
253usimple_lock(
254	usimple_lock_t	l)
255{
256#ifndef	MACHINE_SIMPLE_LOCK
257	pc_t		pc;
258
259	OBTAIN_PC(pc, l);
260	USLDBG(usld_lock_pre(l, pc));
261
262	if(!hw_lock_to(&l->interlock, LockTimeOut))	/* Try to get the lock with a timeout */
263		panic("simple lock deadlock detection - l=%p, cpu=%d, ret=%p", l, cpu_number(), pc);
264
265	USLDBG(usld_lock_post(l, pc));
266#else
267	simple_lock((simple_lock_t)l);
268#endif
269}
270
271
272/*
273 *	Release a usimple_lock.
274 *
275 *	Returns with preemption enabled.  Note
276 *	that the hw_lock routines are responsible for
277 *	maintaining preemption state.
278 */
279void
280usimple_unlock(
281	usimple_lock_t	l)
282{
283#ifndef	MACHINE_SIMPLE_LOCK
284	pc_t	pc;
285
286	OBTAIN_PC(pc, l);
287	USLDBG(usld_unlock(l, pc));
288	sync();
289	hw_lock_unlock(&l->interlock);
290#else
291	simple_unlock_rwmb((simple_lock_t)l);
292#endif
293}
294
295
296/*
297 *	Conditionally acquire a usimple_lock.
298 *
299 *	On success, returns with preemption disabled.
300 *	On failure, returns with preemption in the same state
301 *	as when first invoked.  Note that the hw_lock routines
302 *	are responsible for maintaining preemption state.
303 *
304 *	XXX No stats are gathered on a miss; I preserved this
305 *	behavior from the original assembly-language code, but
306 *	doesn't it make sense to log misses?  XXX
307 */
308unsigned int
309usimple_lock_try(
310	usimple_lock_t	l)
311{
312#ifndef	MACHINE_SIMPLE_LOCK
313	pc_t		pc;
314	unsigned int	success;
315
316	OBTAIN_PC(pc, l);
317	USLDBG(usld_lock_try_pre(l, pc));
318	success = hw_lock_try(&l->interlock);
319	if (success)
320		USLDBG(usld_lock_try_post(l, pc));
321	return success;
322#else
323	return(simple_lock_try((simple_lock_t)l));
324#endif
325}
326
327#if	USLOCK_DEBUG
328/*
329 *	States of a usimple_lock.  The default when initializing
330 *	a usimple_lock is setting it up for debug checking.
331 */
332#define	USLOCK_CHECKED		0x0001		/* lock is being checked */
333#define	USLOCK_TAKEN		0x0002		/* lock has been taken */
334#define	USLOCK_INIT		0xBAA0		/* lock has been initialized */
335#define	USLOCK_INITIALIZED	(USLOCK_INIT|USLOCK_CHECKED)
336#define	USLOCK_CHECKING(l)	(uslock_check &&			\
337				 ((l)->debug.state & USLOCK_CHECKED))
338
339/*
340 *	Trace activities of a particularly interesting lock.
341 */
342void	usl_trace(usimple_lock_t, int, pc_t, const char *);
343
344
345/*
346 *	Initialize the debugging information contained
347 *	in a usimple_lock.
348 */
349void
350usld_lock_init(
351	usimple_lock_t	l,
352	__unused unsigned short	tag)
353{
354	if (l == USIMPLE_LOCK_NULL)
355		panic("lock initialization:  null lock pointer");
356	l->lock_type = USLOCK_TAG;
357	l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
358	l->debug.lock_cpu = l->debug.unlock_cpu = 0;
359	l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
360	l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
361	l->debug.duration[0] = l->debug.duration[1] = 0;
362	l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
363	l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
364	l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
365}
366
367
368/*
369 *	These checks apply to all usimple_locks, not just
370 *	those with USLOCK_CHECKED turned on.
371 */
372int
373usld_lock_common_checks(usimple_lock_t l, const char *caller)
374{
375	if (l == USIMPLE_LOCK_NULL)
376		panic("%s:  null lock pointer", caller);
377	if (l->lock_type != USLOCK_TAG)
378		panic("%s:  0x%x is not a usimple lock", caller, (integer_t) l);
379	if (!(l->debug.state & USLOCK_INIT))
380		panic("%s:  0x%x is not an initialized lock",
381		      caller, (integer_t) l);
382	return USLOCK_CHECKING(l);
383}
384
385
386/*
387 *	Debug checks on a usimple_lock just before attempting
388 *	to acquire it.
389 */
390/* ARGSUSED */
391void
392usld_lock_pre(
393	usimple_lock_t	l,
394	pc_t		pc)
395{
396	const char *caller = "usimple_lock";
397
398	if (!usld_lock_common_checks(l, caller))
399		return;
400
401/*
402 *	Note that we have a weird case where we are getting a lock when we are]
403 *	in the process of putting the system to sleep. We are running with no
404 *	current threads, therefore we can't tell if we are trying to retake a lock
405 *	we have or someone on the other processor has it.  Therefore we just
406 *	ignore this test if the locking thread is 0.
407 */
408
409	if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
410	    l->debug.lock_thread == (void *) current_thread()) {
411		printf("%s:  lock 0x%x already locked (at %p) by",
412		      caller, (integer_t) l, l->debug.lock_pc);
413		printf(" current thread %p (new attempt at pc %p)\n",
414		       l->debug.lock_thread, pc);
415		panic("%s", caller);
416	}
417	mp_disable_preemption();
418	usl_trace(l, cpu_number(), pc, caller);
419	mp_enable_preemption();
420}
421
422
423/*
424 *	Debug checks on a usimple_lock just after acquiring it.
425 *
426 *	Pre-emption has been disabled at this point,
427 *	so we are safe in using cpu_number.
428 */
429void
430usld_lock_post(
431	usimple_lock_t	l,
432	pc_t		pc)
433{
434	int mycpu;
435	const char *caller = "successful usimple_lock";
436
437
438	if (!usld_lock_common_checks(l, caller))
439		return;
440
441	if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
442		panic("%s:  lock 0x%x became uninitialized",
443		      caller, (integer_t) l);
444	if ((l->debug.state & USLOCK_TAKEN))
445		panic("%s:  lock 0x%x became TAKEN by someone else",
446		      caller, (integer_t) l);
447
448	mycpu = cpu_number();
449	l->debug.lock_thread = (void *)current_thread();
450	l->debug.state |= USLOCK_TAKEN;
451	l->debug.lock_pc = pc;
452	l->debug.lock_cpu = mycpu;
453
454	usl_trace(l, mycpu, pc, caller);
455}
456
457
458/*
459 *	Debug checks on a usimple_lock just before
460 *	releasing it.  Note that the caller has not
461 *	yet released the hardware lock.
462 *
463 *	Preemption is still disabled, so there's
464 *	no problem using cpu_number.
465 */
466void
467usld_unlock(
468	usimple_lock_t	l,
469	pc_t		pc)
470{
471	int mycpu;
472	const char *caller = "usimple_unlock";
473
474
475	if (!usld_lock_common_checks(l, caller))
476		return;
477
478	mycpu = cpu_number();
479
480	if (!(l->debug.state & USLOCK_TAKEN))
481		panic("%s:  lock 0x%x hasn't been taken",
482		      caller, (integer_t) l);
483	if (l->debug.lock_thread != (void *) current_thread())
484		panic("%s:  unlocking lock 0x%x, owned by thread %p",
485		      caller, (integer_t) l, l->debug.lock_thread);
486	if (l->debug.lock_cpu != mycpu) {
487		printf("%s:  unlocking lock 0x%x on cpu 0x%x",
488		       caller, (integer_t) l, mycpu);
489		printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
490		panic("%s", caller);
491	}
492	usl_trace(l, mycpu, pc, caller);
493
494	l->debug.unlock_thread = l->debug.lock_thread;
495	l->debug.lock_thread = INVALID_PC;
496	l->debug.state &= ~USLOCK_TAKEN;
497	l->debug.unlock_pc = pc;
498	l->debug.unlock_cpu = mycpu;
499}
500
501
502/*
503 *	Debug checks on a usimple_lock just before
504 *	attempting to acquire it.
505 *
506 *	Preemption isn't guaranteed to be disabled.
507 */
508void
509usld_lock_try_pre(
510	usimple_lock_t	l,
511	pc_t		pc)
512{
513	const char *caller = "usimple_lock_try";
514
515	if (!usld_lock_common_checks(l, caller))
516		return;
517	mp_disable_preemption();
518	usl_trace(l, cpu_number(), pc, caller);
519	mp_enable_preemption();
520}
521
522
523/*
524 *	Debug checks on a usimple_lock just after
525 *	successfully attempting to acquire it.
526 *
527 *	Preemption has been disabled by the
528 *	lock acquisition attempt, so it's safe
529 *	to use cpu_number.
530 */
531void
532usld_lock_try_post(
533	usimple_lock_t	l,
534	pc_t		pc)
535{
536	int mycpu;
537	const char *caller = "successful usimple_lock_try";
538
539	if (!usld_lock_common_checks(l, caller))
540		return;
541
542	if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
543		panic("%s:  lock 0x%x became uninitialized",
544		      caller, (integer_t) l);
545	if ((l->debug.state & USLOCK_TAKEN))
546		panic("%s:  lock 0x%x became TAKEN by someone else",
547		      caller, (integer_t) l);
548
549	mycpu = cpu_number();
550	l->debug.lock_thread = (void *) current_thread();
551	l->debug.state |= USLOCK_TAKEN;
552	l->debug.lock_pc = pc;
553	l->debug.lock_cpu = mycpu;
554
555	usl_trace(l, mycpu, pc, caller);
556}
557
558
559/*
560 *	For very special cases, set traced_lock to point to a
561 *	specific lock of interest.  The result is a series of
562 *	XPRs showing lock operations on that lock.  The lock_seq
563 *	value is used to show the order of those operations.
564 */
565usimple_lock_t		traced_lock;
566unsigned int		lock_seq;
567
568void
569usl_trace(
570	usimple_lock_t	l,
571	int		mycpu,
572	pc_t		pc,
573	const char *	op_name)
574{
575	if (traced_lock == l) {
576		XPR(XPR_SLOCK,
577		    "seq %d, cpu %d, %s @ %x\n",
578		    (integer_t) lock_seq, (integer_t) mycpu,
579		    (integer_t) op_name, (integer_t) pc, 0);
580		lock_seq++;
581	}
582}
583
584
585#endif	/* USLOCK_DEBUG */
586
587/*
588 * The C portion of the shared/exclusive locks package.
589 */
590
591/*
592 * Forward definition
593 */
594
595void lck_rw_lock_exclusive_gen(
596	lck_rw_t	*lck);
597
598lck_rw_type_t lck_rw_done_gen(
599	lck_rw_t	*lck);
600
601void
602lck_rw_lock_shared_gen(
603	lck_rw_t	*lck);
604
605boolean_t
606lck_rw_lock_shared_to_exclusive_gen(
607	lck_rw_t	*lck);
608
609void
610lck_rw_lock_exclusive_to_shared_gen(
611	lck_rw_t	*lck);
612
613boolean_t
614lck_rw_try_lock_exclusive_gen(
615	lck_rw_t	*lck);
616
617boolean_t
618lck_rw_try_lock_shared_gen(
619	lck_rw_t	*lck);
620
621void lck_rw_ext_init(
622	lck_rw_ext_t	*lck,
623	lck_grp_t	*grp,
624	lck_attr_t	*attr);
625
626void lck_rw_ext_backtrace(
627	lck_rw_ext_t	*lck);
628
629void lck_rw_lock_exclusive_ext(
630	lck_rw_ext_t	*lck,
631	lck_rw_t	*rlck);
632
633lck_rw_type_t lck_rw_done_ext(
634	lck_rw_ext_t	*lck,
635	lck_rw_t	*rlck);
636
637void
638lck_rw_lock_shared_ext(
639	lck_rw_ext_t	*lck,
640	lck_rw_t	*rlck);
641
642boolean_t
643lck_rw_lock_shared_to_exclusive_ext(
644	lck_rw_ext_t	*lck,
645	lck_rw_t	*rlck);
646
647void
648lck_rw_lock_exclusive_to_shared_ext(
649	lck_rw_ext_t	*lck,
650	lck_rw_t	*rlck);
651
652boolean_t
653lck_rw_try_lock_exclusive_ext(
654	lck_rw_ext_t	*lck,
655	lck_rw_t	*rlck);
656
657boolean_t
658lck_rw_try_lock_shared_ext(
659	lck_rw_ext_t	*lck,
660	lck_rw_t	*rlck);
661
662void
663lck_rw_ilk_lock(
664	lck_rw_t	*lck);
665
666void
667lck_rw_ilk_unlock(
668	lck_rw_t	*lck);
669
670void
671lck_rw_check_type(
672	lck_rw_ext_t	*lck,
673	lck_rw_t	*rlck);
674
675void
676lck_rw_assert_ext(
677	lck_rw_ext_t	*lck,
678	lck_rw_t	*rlck,
679	unsigned int	type);
680
681/*
682 *	Routine:	lock_alloc
683 *	Function:
684 *		Allocate a lock for external users who cannot
685 *		hard-code the structure definition into their
686 *		objects.
687 *		For now just use kalloc, but a zone is probably
688 *		warranted.
689 */
690lock_t *
691lock_alloc(
692	boolean_t		can_sleep,
693	__unused unsigned short	tag,
694	__unused unsigned short	tag1)
695{
696	lock_t		*lck;
697
698	if ((lck = (lock_t *)kalloc(sizeof(lock_t))) != 0)
699	  lock_init(lck, can_sleep, tag, tag1);
700	return(lck);
701}
702
703/*
704 *	Routine:	lock_init
705 *	Function:
706 *		Initialize a lock; required before use.
707 *		Note that clients declare the "struct lock"
708 *		variables and then initialize them, rather
709 *		than getting a new one from this module.
710 */
711void
712lock_init(
713	lock_t			*lck,
714	boolean_t		can_sleep,
715	__unused unsigned short	tag,
716	__unused unsigned short	tag1)
717{
718	if (!can_sleep)
719		panic("lock_init: sleep mode must be set to TRUE\n");
720
721	(void) memset((void *) lck, 0, sizeof(lock_t));
722#if	MACH_LDEBUG
723	lck->lck_rw_deb.type = RW_TAG;
724	lck->lck_rw_attr |= (LCK_RW_ATTR_DEBUG|LCK_RW_ATTR_DIS_THREAD|LCK_RW_ATTR_DIS_MYLOCK);
725	lck->lck_rw.lck_rw_priv_excl = TRUE;
726#else
727	lck->lck_rw_priv_excl = TRUE;
728#endif
729
730}
731
732
733/*
734 *	Routine:	lock_free
735 *	Function:
736 *		Free a lock allocated for external users.
737 *		For now just use kfree, but a zone is probably
738 *		warranted.
739 */
740void
741lock_free(
742	lock_t	*lck)
743{
744	kfree((void *)lck, sizeof(lock_t));
745}
746
747#if	MACH_LDEBUG
748void
749lock_write(
750	lock_t	*lck)
751{
752	lck_rw_lock_exclusive_ext((lck_rw_ext_t *)lck, (lck_rw_t *)lck);
753}
754
755void
756lock_done(
757	lock_t	*lck)
758{
759	(void)lck_rw_done_ext((lck_rw_ext_t *)lck, (lck_rw_t *)lck);
760}
761
762void
763lock_read(
764	lock_t	*lck)
765{
766	lck_rw_lock_shared_ext((lck_rw_ext_t *)lck, (lck_rw_t *)lck);
767}
768
769boolean_t
770lock_read_to_write(
771	lock_t	*lck)
772{
773	return(lck_rw_lock_shared_to_exclusive_ext((lck_rw_ext_t *)lck, (lck_rw_t *)lck));
774}
775
776void
777lock_write_to_read(
778	register lock_t	*lck)
779{
780	lck_rw_lock_exclusive_to_shared_ext((lck_rw_ext_t *)lck, (lck_rw_t *)lck);
781}
782#endif
783
784/*
785 *      Routine:        lck_rw_alloc_init
786 */
787lck_rw_t *
788lck_rw_alloc_init(
789	lck_grp_t	*grp,
790	lck_attr_t	*attr) {
791	lck_rw_t	*lck;
792
793	if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0)
794		lck_rw_init(lck, grp, attr);
795
796	return(lck);
797}
798
799/*
800 *      Routine:        lck_rw_free
801 */
802void
803lck_rw_free(
804	lck_rw_t	*lck,
805	lck_grp_t	*grp) {
806	lck_rw_destroy(lck, grp);
807	kfree((void *)lck, sizeof(lck_rw_t));
808}
809
810/*
811 *      Routine:        lck_rw_init
812 */
813void
814lck_rw_init(
815	lck_rw_t		*lck,
816	lck_grp_t		*grp,
817	lck_attr_t		*attr) {
818	lck_rw_ext_t	*lck_ext;
819	lck_attr_t	*lck_attr;
820
821	if (attr != LCK_ATTR_NULL)
822		lck_attr = attr;
823	else
824		lck_attr = &LockDefaultLckAttr;
825
826	if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
827		if ((lck_ext = (lck_rw_ext_t *)kalloc(sizeof(lck_rw_ext_t))) != 0) {
828			lck_rw_ext_init(lck_ext, grp, lck_attr);
829			lck->lck_rw_tag = LCK_RW_TAG_INDIRECT;
830			lck->lck_rw_ptr = lck_ext;
831		}
832	} else {
833		(void) memset((void *) lck, 0, sizeof(lck_rw_t));
834		if ((lck_attr->lck_attr_val)  & LCK_ATTR_RW_SHARED_PRIORITY)
835			lck->lck_rw_priv_excl = FALSE;
836		else
837			lck->lck_rw_priv_excl = TRUE;
838	}
839
840	lck_grp_reference(grp);
841	lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
842}
843
844/*
845 *      Routine:        lck_rw_ext_init
846 */
847void
848lck_rw_ext_init(
849	lck_rw_ext_t	*lck,
850	lck_grp_t	*grp,
851	lck_attr_t	*attr) {
852
853	bzero((void *)lck, sizeof(lck_rw_ext_t));
854	if ((attr->lck_attr_val)  & LCK_ATTR_RW_SHARED_PRIORITY)
855		lck->lck_rw.lck_rw_priv_excl = FALSE;
856	else
857		lck->lck_rw.lck_rw_priv_excl = TRUE;
858
859	if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
860		lck->lck_rw_deb.type = RW_TAG;
861		lck->lck_rw_attr |= LCK_RW_ATTR_DEBUG;
862	}
863
864	lck->lck_rw_grp = grp;
865
866	if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT)
867		 lck->lck_rw_attr |= LCK_RW_ATTR_STAT;
868}
869
870/*
871 *      Routine:        lck_rw_destroy
872 */
873void
874lck_rw_destroy(
875	lck_rw_t	*lck,
876	lck_grp_t	*grp) {
877	boolean_t lck_is_indirect;
878
879	if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED)
880		return;
881	lck_is_indirect = (lck->lck_rw_tag == LCK_RW_TAG_INDIRECT);
882	lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
883	if (lck_is_indirect)
884		kfree((void *)lck->lck_rw_ptr, sizeof(lck_rw_ext_t));
885
886	lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
887	lck_grp_deallocate(grp);
888	return;
889}
890
891/*
892 *	Routine:	lck_rw_lock
893 */
894void
895lck_rw_lock(
896	lck_rw_t	*lck,
897	lck_rw_type_t	lck_rw_type)
898{
899	if (lck_rw_type == LCK_RW_TYPE_SHARED)
900		lck_rw_lock_shared(lck);
901	else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
902		lck_rw_lock_exclusive(lck);
903	else
904		panic("lck_rw_lock(): Invalid RW lock type: %d\n", lck_rw_type);
905}
906
907
908/*
909 *	Routine:	lck_rw_unlock
910 */
911void
912lck_rw_unlock(
913	lck_rw_t	*lck,
914	lck_rw_type_t	lck_rw_type)
915{
916	if (lck_rw_type == LCK_RW_TYPE_SHARED)
917		lck_rw_unlock_shared(lck);
918	else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
919		lck_rw_unlock_exclusive(lck);
920	else
921		panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type);
922}
923
924
925/*
926 *	Routine:	lck_rw_unlock_shared
927 */
928void
929lck_rw_unlock_shared(
930	lck_rw_t	*lck)
931{
932	lck_rw_type_t	ret;
933
934	ret = lck_rw_done(lck);
935
936	if (ret != LCK_RW_TYPE_SHARED)
937		panic("lck_rw_unlock(): lock held in mode: %d\n", ret);
938}
939
940
941/*
942 *	Routine:	lck_rw_unlock_exclusive
943 */
944void
945lck_rw_unlock_exclusive(
946	lck_rw_t	*lck)
947{
948	lck_rw_type_t	ret;
949
950	ret = lck_rw_done(lck);
951
952	if (ret != LCK_RW_TYPE_EXCLUSIVE)
953		panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret);
954}
955
956
957/*
958 *      Routine:        lck_rw_try_lock
959 */
960boolean_t
961lck_rw_try_lock(
962	lck_rw_t	*lck,
963	lck_rw_type_t	lck_rw_type)
964{
965	if (lck_rw_type == LCK_RW_TYPE_SHARED)
966		return(lck_rw_try_lock_shared(lck));
967	else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
968		return(lck_rw_try_lock_exclusive(lck));
969	else
970		panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type);
971	return(FALSE);
972}
973
974
975
976/*
977 *      Routine:        lck_rw_lock_exclusive_gen
978 */
979void
980lck_rw_lock_exclusive_gen(
981	lck_rw_t	*lck)
982{
983	int	   i;
984	wait_result_t	res;
985#if	CONFIG_DTRACE
986	uint64_t wait_interval = 0;
987	int slept = 0;
988	int readers_at_sleep;
989#endif
990
991	lck_rw_ilk_lock(lck);
992#if	CONFIG_DTRACE
993	readers_at_sleep = lck->lck_rw_shared_cnt;
994#endif
995
996	/*
997	 *	Try to acquire the lck_rw_want_excl bit.
998	 */
999	while (lck->lck_rw_want_excl) {
1000		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
1001
1002#if	CONFIG_DTRACE
1003		if ((lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK]) && wait_interval == 0) {
1004			wait_interval = mach_absolute_time();
1005		} else {
1006			wait_interval = -1;
1007		}
1008#endif
1009
1010		i = lock_wait_time[1];
1011		if (i != 0) {
1012			lck_rw_ilk_unlock(lck);
1013			while (--i != 0 && lck->lck_rw_want_excl)
1014				continue;
1015			lck_rw_ilk_lock(lck);
1016		}
1017
1018		if (lck->lck_rw_want_excl) {
1019			lck->lck_rw_waiting = TRUE;
1020			res = assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
1021			if (res == THREAD_WAITING) {
1022				lck_rw_ilk_unlock(lck);
1023				res = thread_block(THREAD_CONTINUE_NULL);
1024#if	CONFIG_DTRACE
1025				slept = 1;
1026#endif
1027				lck_rw_ilk_lock(lck);
1028			}
1029		}
1030		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_END, (int)lck, res, 0, 0, 0);
1031	}
1032	lck->lck_rw_want_excl = TRUE;
1033
1034	/* Wait for readers (and upgrades) to finish */
1035
1036	while ((lck->lck_rw_shared_cnt != 0) || lck->lck_rw_want_upgrade) {
1037
1038		i = lock_wait_time[1];
1039
1040		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_START,
1041			     (int)lck, lck->lck_rw_shared_cnt, lck->lck_rw_want_upgrade, i, 0);
1042#if	CONFIG_DTRACE
1043		if ((lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK]) && wait_interval == 0) {
1044			wait_interval = mach_absolute_time();
1045		} else {
1046			wait_interval = (unsigned) -1;
1047		}
1048#endif
1049
1050		if (i != 0) {
1051			lck_rw_ilk_unlock(lck);
1052			while (--i != 0 && (lck->lck_rw_shared_cnt != 0 ||
1053					    lck->lck_rw_want_upgrade))
1054				continue;
1055			lck_rw_ilk_lock(lck);
1056		}
1057
1058		if (lck->lck_rw_shared_cnt != 0 || lck->lck_rw_want_upgrade) {
1059			lck->lck_rw_waiting = TRUE;
1060			res = assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
1061			if (res == THREAD_WAITING) {
1062				lck_rw_ilk_unlock(lck);
1063				res = thread_block(THREAD_CONTINUE_NULL);
1064#if	CONFIG_DTRACE
1065				slept = 1;
1066#endif
1067				lck_rw_ilk_lock(lck);
1068			}
1069		}
1070		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_END,
1071			     (int)lck, lck->lck_rw_shared_cnt, lck->lck_rw_want_upgrade, res, 0);
1072	}
1073
1074	lck_rw_ilk_unlock(lck);
1075#if	CONFIG_DTRACE
1076	/*
1077	 * Decide what latencies we suffered that are Dtrace events.
1078	 * If we have set wait_interval, then we either spun or slept.
1079	 * At least we get out from under the interlock before we record
1080	 * which is the best we can do here to minimize the impact
1081	 * of the tracing.
1082	 */
1083	if (wait_interval != 0 && wait_interval != (unsigned) -1) {
1084		if (slept == 0) {
1085			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lck,
1086			    mach_absolute_time() - wait_interval, 1);
1087		} else {
1088			/*
1089			 * For the blocking case, we also record if when we blocked
1090			 * it was held for read or write, and how many readers.
1091			 * Notice that above we recorded this before we dropped
1092			 * the interlock so the count is accurate.
1093			 */
1094			LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lck,
1095			    mach_absolute_time() - wait_interval, 1,
1096			    (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1097		}
1098	}
1099	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lck, 1);
1100#endif
1101}
1102
1103
1104/*
1105 *      Routine:        lck_rw_done_gen
1106 */
1107lck_rw_type_t
1108lck_rw_done_gen(
1109	lck_rw_t	*lck)
1110{
1111	boolean_t	do_wakeup = FALSE;
1112	lck_rw_type_t	lck_rw_type;
1113
1114
1115	lck_rw_ilk_lock(lck);
1116
1117	if (lck->lck_rw_shared_cnt != 0) {
1118		lck_rw_type = LCK_RW_TYPE_SHARED;
1119		lck->lck_rw_shared_cnt--;
1120	}
1121	else {
1122		lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
1123		if (lck->lck_rw_want_upgrade)
1124			lck->lck_rw_want_upgrade = FALSE;
1125		else
1126			lck->lck_rw_want_excl = FALSE;
1127	}
1128
1129	/*
1130	 *	There is no reason to wakeup a lck_rw_waiting thread
1131	 *	if the read-count is non-zero.  Consider:
1132	 *		we must be dropping a read lock
1133	 *		threads are waiting only if one wants a write lock
1134	 *		if there are still readers, they can't proceed
1135	 */
1136
1137	if (lck->lck_rw_waiting && (lck->lck_rw_shared_cnt == 0)) {
1138		lck->lck_rw_waiting = FALSE;
1139		do_wakeup = TRUE;
1140	}
1141
1142	lck_rw_ilk_unlock(lck);
1143
1144	if (do_wakeup)
1145		thread_wakeup((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))));
1146	LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lck_rw_type);
1147	return(lck_rw_type);
1148}
1149
1150
1151/*
1152 *	Routine:	lck_rw_lock_shared_gen
1153 */
1154void
1155lck_rw_lock_shared_gen(
1156	lck_rw_t	*lck)
1157{
1158	int		i;
1159	wait_result_t      res;
1160#if	CONFIG_DTRACE
1161	uint64_t wait_interval = 0;
1162	int slept = 0;
1163	int readers_at_sleep;
1164#endif
1165
1166	lck_rw_ilk_lock(lck);
1167#if	CONFIG_DTRACE
1168	readers_at_sleep = lck->lck_rw_shared_cnt;
1169#endif
1170
1171	while ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
1172	        ((lck->lck_rw_shared_cnt == 0) || (lck->lck_rw_priv_excl))) {
1173		i = lock_wait_time[1];
1174
1175		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_START,
1176			     (int)lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, i, 0);
1177#if	CONFIG_DTRACE
1178		if ((lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK]) && wait_interval == 0) {
1179			wait_interval = mach_absolute_time();
1180		} else {
1181			wait_interval = (unsigned) -1;
1182		}
1183#endif
1184
1185		if (i != 0) {
1186			lck_rw_ilk_unlock(lck);
1187			while (--i != 0 &&
1188			       (lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
1189			       ((lck->lck_rw_shared_cnt == 0) || (lck->lck_rw_priv_excl)))
1190				continue;
1191			lck_rw_ilk_lock(lck);
1192		}
1193
1194		if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
1195		    ((lck->lck_rw_shared_cnt == 0) || (lck->lck_rw_priv_excl))) {
1196			lck->lck_rw_waiting = TRUE;
1197			res = assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
1198			if (res == THREAD_WAITING) {
1199				lck_rw_ilk_unlock(lck);
1200				res = thread_block(THREAD_CONTINUE_NULL);
1201#if	CONFIG_DTRACE
1202				slept = 1;
1203#endif
1204				lck_rw_ilk_lock(lck);
1205			}
1206		}
1207		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_END,
1208			     (int)lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, res, 0);
1209	}
1210
1211	lck->lck_rw_shared_cnt++;
1212
1213	lck_rw_ilk_unlock(lck);
1214#if	CONFIG_DTRACE
1215	if (wait_interval != 0 && wait_interval != (unsigned) -1) {
1216		if (slept == 0) {
1217			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1218		} else {
1219			LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
1220			    mach_absolute_time() - wait_interval, 0,
1221			    (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1222		}
1223	}
1224	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
1225#endif
1226}
1227
1228
1229/*
1230 *	Routine:	lck_rw_lock_shared_to_exclusive_gen
1231 *	Function:
1232 *		Improves a read-only lock to one with
1233 *		write permission.  If another reader has
1234 *		already requested an upgrade to a write lock,
1235 *		no lock is held upon return.
1236 *
1237 *		Returns FALSE if the upgrade *failed*.
1238 */
1239
1240boolean_t
1241lck_rw_lock_shared_to_exclusive_gen(
1242	lck_rw_t	*lck)
1243{
1244	int	    i;
1245	boolean_t	    do_wakeup = FALSE;
1246	wait_result_t      res;
1247#if	CONFIG_DTRACE
1248	uint64_t wait_interval = 0;
1249	int slept = 0;
1250	int readers_at_sleep = 0;
1251#endif
1252
1253	lck_rw_ilk_lock(lck);
1254
1255	lck->lck_rw_shared_cnt--;
1256
1257	if (lck->lck_rw_want_upgrade) {
1258		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_START,
1259			     (int)lck, lck->lck_rw_shared_cnt, lck->lck_rw_want_upgrade, 0, 0);
1260
1261		/*
1262		 *	Someone else has requested upgrade.
1263		 *	Since we've released a read lock, wake
1264		 *	him up.
1265		 */
1266		if (lck->lck_rw_waiting && (lck->lck_rw_shared_cnt == 0)) {
1267			lck->lck_rw_waiting = FALSE;
1268			do_wakeup = TRUE;
1269		}
1270
1271		lck_rw_ilk_unlock(lck);
1272
1273		if (do_wakeup)
1274			thread_wakeup((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))));
1275
1276		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_END,
1277			     (int)lck, lck->lck_rw_shared_cnt, lck->lck_rw_want_upgrade, 0, 0);
1278
1279		return (FALSE);
1280	}
1281
1282	lck->lck_rw_want_upgrade = TRUE;
1283
1284	while (lck->lck_rw_shared_cnt != 0) {
1285		i = lock_wait_time[1];
1286
1287		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_START,
1288			     (int)lck, lck->lck_rw_shared_cnt, i, 0, 0);
1289
1290#if	CONFIG_DTRACE
1291		readers_at_sleep = lck->lck_rw_shared_cnt;
1292		if ((lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK]) && wait_interval == 0) {
1293			wait_interval = mach_absolute_time();
1294		} else {
1295			wait_interval = (unsigned) -1;
1296		}
1297#endif
1298		if (i != 0) {
1299			lck_rw_ilk_unlock(lck);
1300			while (--i != 0 && lck->lck_rw_shared_cnt != 0)
1301				continue;
1302			lck_rw_ilk_lock(lck);
1303		}
1304
1305		if (lck->lck_rw_shared_cnt != 0) {
1306			lck->lck_rw_waiting = TRUE;
1307			res = assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
1308			if (res == THREAD_WAITING) {
1309				lck_rw_ilk_unlock(lck);
1310				res = thread_block(THREAD_CONTINUE_NULL);
1311#if	CONFIG_DTRACE
1312				slept = 1;
1313#endif
1314				lck_rw_ilk_lock(lck);
1315			}
1316		}
1317		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_END,
1318			     (int)lck, lck->lck_rw_shared_cnt, 0, 0, 0);
1319	}
1320
1321	lck_rw_ilk_unlock(lck);
1322
1323#if	CONFIG_DTRACE
1324	/*
1325	 * We infer if we took a sleep or spin path by whether readers_at_sleep
1326	 * was set.
1327	 */
1328	if (wait_interval != 0 && wait_interval != (unsigned) -1 && readers_at_sleep) {
1329		if (slept == 0) {
1330			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1331		} else {
1332			LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lck,
1333			    mach_absolute_time() - wait_interval, 1,
1334			    (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1335		}
1336	}
1337#endif
1338
1339	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lck, 1);
1340	return (TRUE);
1341}
1342
1343/*
1344 *      Routine:        lck_rw_lock_exclusive_to_shared_gen
1345 */
1346void
1347lck_rw_lock_exclusive_to_shared_gen(
1348	lck_rw_t	*lck)
1349{
1350	boolean_t	   do_wakeup = FALSE;
1351
1352	lck_rw_ilk_lock(lck);
1353
1354	lck->lck_rw_shared_cnt++;
1355	if (lck->lck_rw_want_upgrade)
1356		lck->lck_rw_want_upgrade = FALSE;
1357	else
1358	 	lck->lck_rw_want_excl = FALSE;
1359
1360	if (lck->lck_rw_waiting) {
1361		lck->lck_rw_waiting = FALSE;
1362		do_wakeup = TRUE;
1363	}
1364
1365	lck_rw_ilk_unlock(lck);
1366
1367	if (do_wakeup)
1368		thread_wakeup((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))));
1369
1370	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1371}
1372
1373
1374/*
1375 *	Routine:	lck_rw_try_lock_exclusive_gen
1376 *	Function:
1377 *		Tries to get a write lock.
1378 *
1379 *		Returns FALSE if the lock is not held on return.
1380 */
1381
1382boolean_t
1383lck_rw_try_lock_exclusive_gen(
1384	lck_rw_t	*lck)
1385{
1386	lck_rw_ilk_lock(lck);
1387
1388	if (lck->lck_rw_want_excl || lck->lck_rw_want_upgrade || lck->lck_rw_shared_cnt) {
1389		/*
1390		 *	Can't get lock.
1391		 */
1392		lck_rw_ilk_unlock(lck);
1393		return(FALSE);
1394	}
1395
1396	/*
1397	 *	Have lock.
1398	 */
1399
1400	lck->lck_rw_want_excl = TRUE;
1401
1402	lck_rw_ilk_unlock(lck);
1403
1404	LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lck, 1);
1405	return(TRUE);
1406}
1407
1408/*
1409 *	Routine:	lck_rw_try_lock_shared_gen
1410 *	Function:
1411 *		Tries to get a read lock.
1412 *
1413 *		Returns FALSE if the lock is not held on return.
1414 */
1415
1416boolean_t
1417lck_rw_try_lock_shared_gen(
1418	lck_rw_t	*lck)
1419{
1420	lck_rw_ilk_lock(lck);
1421
1422	if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
1423	    ((lck->lck_rw_shared_cnt == 0) || (lck->lck_rw_priv_excl))) {
1424		lck_rw_ilk_unlock(lck);
1425		return(FALSE);
1426	}
1427
1428	lck->lck_rw_shared_cnt++;
1429
1430	lck_rw_ilk_unlock(lck);
1431
1432	LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lck, 0);
1433	return(TRUE);
1434}
1435
1436
1437/*
1438 *	Routine:	lck_rw_ext_backtrace
1439 */
1440void
1441lck_rw_ext_backtrace(
1442	lck_rw_ext_t	*lck)
1443{
1444	unsigned int *stackptr, *stackptr_prev;
1445	unsigned int frame;
1446
1447	__asm__ volatile("mr %0,r1" : "=r" (stackptr));
1448	frame = 0;
1449	while (frame < LCK_FRAMES_MAX) {
1450		stackptr_prev = stackptr;
1451		stackptr = ( unsigned int *)*stackptr;
1452		if ( (((unsigned int)stackptr_prev) ^ ((unsigned int)stackptr)) > 8192)
1453			break;
1454		lck->lck_rw_deb.stack[frame] = *(stackptr+2);
1455		frame++;
1456	}
1457	while (frame < LCK_FRAMES_MAX) {
1458		lck->lck_rw_deb.stack[frame] = 0;
1459		frame++;
1460	}
1461}
1462
1463
1464/*
1465 *      Routine:        lck_rw_lock_exclusive_ext
1466 */
1467void
1468lck_rw_lock_exclusive_ext(
1469	lck_rw_ext_t	*lck,
1470	lck_rw_t	*rlck)
1471{
1472	int				i;
1473	wait_result_t	res;
1474	boolean_t		lock_miss = FALSE;
1475	boolean_t		lock_wait = FALSE;
1476	boolean_t		lock_stat;
1477#if	CONFIG_DTRACE
1478	uint64_t wait_interval = 0;
1479	int slept = 0;
1480	int readers_at_sleep;
1481#endif
1482
1483	lck_rw_check_type(lck, rlck);
1484
1485	if ( ((lck->lck_rw_attr & (LCK_RW_ATTR_DEBUG|LCK_RW_ATTR_DIS_MYLOCK)) == LCK_RW_ATTR_DEBUG)
1486	     && (lck->lck_rw_deb.thread == current_thread()))
1487		panic("rw lock (%p) recursive lock attempt\n", rlck);
1488
1489	lck_rw_ilk_lock(&lck->lck_rw);
1490#if	CONFIG_DTRACE
1491	readers_at_sleep = lck->lck_rw.lck_rw_shared_cnt;
1492#endif
1493
1494	lock_stat = (lck->lck_rw_attr & LCK_RW_ATTR_STAT) ? TRUE : FALSE;
1495
1496	if (lock_stat)
1497		lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt++;
1498
1499	/*
1500	 *	Try to acquire the lck_rw.lck_rw_want_excl bit.
1501	 */
1502	while (lck->lck_rw.lck_rw_want_excl) {
1503		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_START, (int)rlck, 0, 0, 0, 0);
1504
1505		if (lock_stat && !lock_miss) {
1506			lock_miss = TRUE;
1507			lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt++;
1508		}
1509#if	CONFIG_DTRACE
1510		if ((lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK]) && wait_interval == 0) {
1511			wait_interval = mach_absolute_time();
1512		} else {
1513			wait_interval = (unsigned) -1;
1514		}
1515#endif
1516
1517		i = lock_wait_time[1];
1518		if (i != 0) {
1519			lck_rw_ilk_unlock(&lck->lck_rw);
1520			while (--i != 0 && lck->lck_rw.lck_rw_want_excl)
1521				continue;
1522			lck_rw_ilk_lock(&lck->lck_rw);
1523		}
1524
1525		if (lck->lck_rw.lck_rw_want_excl) {
1526			lck->lck_rw.lck_rw_waiting = TRUE;
1527			res = assert_wait((event_t)(((unsigned int*)rlck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
1528			if (res == THREAD_WAITING) {
1529				if (lock_stat && !lock_wait) {
1530					lock_wait = TRUE;
1531					lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt++;
1532				}
1533				lck_rw_ilk_unlock(&lck->lck_rw);
1534				res = thread_block(THREAD_CONTINUE_NULL);
1535#if	CONFIG_DTRACE
1536				slept = 1;
1537#endif
1538				lck_rw_ilk_lock(&lck->lck_rw);
1539			}
1540		}
1541		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_END, (int)rlck, res, 0, 0, 0);
1542	}
1543	lck->lck_rw.lck_rw_want_excl = TRUE;
1544
1545	/* Wait for readers (and upgrades) to finish */
1546
1547	while ((lck->lck_rw.lck_rw_shared_cnt != 0) || lck->lck_rw.lck_rw_want_upgrade) {
1548		i = lock_wait_time[1];
1549
1550		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_START,
1551			     (int)rlck, lck->lck_rw.lck_rw_shared_cnt, lck->lck_rw.lck_rw_want_upgrade, i, 0);
1552#if	CONFIG_DTRACE
1553		if ((lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK]) && wait_interval == 0) {
1554			wait_interval = mach_absolute_time();
1555		} else {
1556			wait_interval = (unsigned) -1;
1557		}
1558#endif
1559
1560		if (lock_stat && !lock_miss) {
1561			lock_miss = TRUE;
1562			lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt++;
1563		}
1564
1565		if (i != 0) {
1566			lck_rw_ilk_unlock(&lck->lck_rw);
1567			while (--i != 0 && (lck->lck_rw.lck_rw_shared_cnt != 0 ||
1568					    lck->lck_rw.lck_rw_want_upgrade))
1569				continue;
1570			lck_rw_ilk_lock(&lck->lck_rw);
1571		}
1572
1573		if (lck->lck_rw.lck_rw_shared_cnt != 0 || lck->lck_rw.lck_rw_want_upgrade) {
1574			lck->lck_rw.lck_rw_waiting = TRUE;
1575			res = assert_wait((event_t)(((unsigned int*)rlck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
1576			if (res == THREAD_WAITING) {
1577				if (lock_stat && !lock_wait) {
1578					lock_wait = TRUE;
1579					lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt++;
1580				}
1581				lck_rw_ilk_unlock(&lck->lck_rw);
1582				res = thread_block(THREAD_CONTINUE_NULL);
1583#if	CONFIG_DTRACE
1584				slept = 1;
1585#endif
1586				lck_rw_ilk_lock(&lck->lck_rw);
1587			}
1588		}
1589		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_END,
1590			     (int)rlck, lck->lck_rw.lck_rw_shared_cnt, lck->lck_rw.lck_rw_want_upgrade, res, 0);
1591	}
1592
1593	lck->lck_rw_deb.pc_excl = __builtin_return_address(0);
1594	if (LcksOpts & enaLkExtStck)
1595		lck_rw_ext_backtrace(lck);
1596	lck->lck_rw_deb.thread = current_thread();
1597
1598	lck_rw_ilk_unlock(&lck->lck_rw);
1599#if	CONFIG_DTRACE
1600	/*
1601	 * Decide what latencies we suffered that are Dtrace events.
1602	 * If we have set wait_interval, then we either spun or slept.
1603	 * At least we get out from under the interlock before we record
1604	 * which is the best we can do here to minimize the impact
1605	 * of the tracing.
1606	 */
1607	if (wait_interval != 0 && wait_interval != (unsigned) -1) {
1608		if (slept == 0) {
1609			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lck,
1610			    mach_absolute_time() - wait_interval, 1);
1611		} else {
1612			/*
1613			 * For the blocking case, we also record if when we blocked
1614			 * it was held for read or write, and how many readers.
1615			 * Notice that above we recorded this before we dropped
1616			 * the interlock so the count is accurate.
1617			 */
1618			LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lck,
1619			    mach_absolute_time() - wait_interval, 1,
1620			    (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1621		}
1622	}
1623	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lck, 1);
1624#endif
1625}
1626
1627
1628/*
1629 *      Routine:        lck_rw_done_ext
1630 */
1631lck_rw_type_t
1632lck_rw_done_ext(
1633	lck_rw_ext_t	*lck,
1634	lck_rw_t	*rlck)
1635{
1636	boolean_t	do_wakeup = FALSE;
1637	lck_rw_type_t	lck_rw_type;
1638
1639
1640	lck_rw_check_type(lck, rlck);
1641
1642	lck_rw_ilk_lock(&lck->lck_rw);
1643
1644	if (lck->lck_rw.lck_rw_shared_cnt != 0) {
1645		lck_rw_type = LCK_RW_TYPE_SHARED;
1646		lck->lck_rw.lck_rw_shared_cnt--;
1647	}
1648	else {
1649		lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
1650		if (lck->lck_rw.lck_rw_want_upgrade)
1651			lck->lck_rw.lck_rw_want_upgrade = FALSE;
1652		else if (lck->lck_rw.lck_rw_want_excl)
1653			lck->lck_rw.lck_rw_want_excl = FALSE;
1654		else
1655			panic("rw lock (%p) bad state (0x%08X) on attempt to release a shared or exlusive right\n",
1656				  rlck, lck->lck_rw.lck_rw_tag);
1657		if (lck->lck_rw_deb.thread == THREAD_NULL)
1658			panic("rw lock (%p) not held\n",
1659			      rlck);
1660		else if ( ((lck->lck_rw_attr & (LCK_RW_ATTR_DEBUG|LCK_RW_ATTR_DIS_THREAD)) == LCK_RW_ATTR_DEBUG)
1661			 && (lck->lck_rw_deb.thread != current_thread()))
1662			panic("rw lock (%p) unlocked by non-owner(%p), current owner(%p)\n",
1663				  rlck, current_thread(), lck->lck_rw_deb.thread);
1664		lck->lck_rw_deb.thread = THREAD_NULL;
1665	}
1666
1667	if (lck->lck_rw_attr & LCK_RW_ATTR_DEBUG)
1668		lck->lck_rw_deb.pc_done = __builtin_return_address(0);
1669
1670	/*
1671	 *	There is no reason to wakeup a waiting thread
1672	 *	if the read-count is non-zero.  Consider:
1673	 *		we must be dropping a read lock
1674	 *		threads are waiting only if one wants a write lock
1675	 *		if there are still readers, they can't proceed
1676	 */
1677
1678	if (lck->lck_rw.lck_rw_waiting && (lck->lck_rw.lck_rw_shared_cnt == 0)) {
1679		lck->lck_rw.lck_rw_waiting = FALSE;
1680		do_wakeup = TRUE;
1681	}
1682
1683	lck_rw_ilk_unlock(&lck->lck_rw);
1684
1685	if (do_wakeup)
1686		thread_wakeup((event_t)(((unsigned int*)rlck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))));
1687	LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lck_rw_type);
1688	return(lck_rw_type);
1689}
1690
1691
1692/*
1693 *	Routine:	lck_rw_lock_shared_ext
1694 */
1695void
1696lck_rw_lock_shared_ext(
1697	lck_rw_ext_t	*lck,
1698	lck_rw_t	*rlck)
1699{
1700	int				i;
1701	wait_result_t	res;
1702	boolean_t		lock_miss = FALSE;
1703	boolean_t		lock_wait = FALSE;
1704	boolean_t		lock_stat;
1705#if	CONFIG_DTRACE
1706	uint64_t wait_interval = 0;
1707	int slept = 0;
1708	int readers_at_sleep;
1709#endif
1710
1711	lck_rw_check_type(lck, rlck);
1712
1713	lck_rw_ilk_lock(&lck->lck_rw);
1714#if	CONFIG_DTRACE
1715	readers_at_sleep = lck->lck_rw.lck_rw_shared_cnt;
1716#endif
1717
1718	lock_stat = (lck->lck_rw_attr & LCK_RW_ATTR_STAT) ? TRUE : FALSE;
1719
1720	if (lock_stat)
1721		lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt++;
1722
1723	while ((lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade) &&
1724	       ((lck->lck_rw.lck_rw_shared_cnt == 0) || (lck->lck_rw.lck_rw_priv_excl))) {
1725		i = lock_wait_time[1];
1726
1727		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_START,
1728			     (int)rlck, lck->lck_rw.lck_rw_want_excl, lck->lck_rw.lck_rw_want_upgrade, i, 0);
1729#if	CONFIG_DTRACE
1730		if ((lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK]) && wait_interval == 0) {
1731			wait_interval = mach_absolute_time();
1732		} else {
1733			wait_interval = (unsigned) -1;
1734		}
1735#endif
1736
1737		if (lock_stat && !lock_miss) {
1738			lock_miss = TRUE;
1739			lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt++;
1740		}
1741
1742		if (i != 0) {
1743			lck_rw_ilk_unlock(&lck->lck_rw);
1744			while (--i != 0 &&
1745			       (lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade) &&
1746	       		       ((lck->lck_rw.lck_rw_shared_cnt == 0) || (lck->lck_rw.lck_rw_priv_excl)))
1747				continue;
1748			lck_rw_ilk_lock(&lck->lck_rw);
1749		}
1750
1751		if ((lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade)  &&
1752		   ((lck->lck_rw.lck_rw_shared_cnt == 0) || (lck->lck_rw.lck_rw_priv_excl))) {
1753			lck->lck_rw.lck_rw_waiting = TRUE;
1754			res = assert_wait((event_t)(((unsigned int*)rlck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
1755			if (res == THREAD_WAITING) {
1756				if (lock_stat && !lock_wait) {
1757					lock_wait = TRUE;
1758					lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt++;
1759				}
1760				lck_rw_ilk_unlock(&lck->lck_rw);
1761				res = thread_block(THREAD_CONTINUE_NULL);
1762#if	CONFIG_DTRACE
1763				slept = 1;
1764#endif
1765				lck_rw_ilk_lock(&lck->lck_rw);
1766			}
1767		}
1768		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_END,
1769			     (int)rlck, lck->lck_rw.lck_rw_want_excl, lck->lck_rw.lck_rw_want_upgrade, res, 0);
1770	}
1771
1772	lck->lck_rw.lck_rw_shared_cnt++;
1773
1774	lck_rw_ilk_unlock(&lck->lck_rw);
1775#if	CONFIG_DTRACE
1776	if (wait_interval != 0 && wait_interval != (unsigned) -1) {
1777		if (slept == 0) {
1778			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1779		} else {
1780			LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
1781			    mach_absolute_time() - wait_interval, 0,
1782			    (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1783		}
1784	}
1785	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
1786#endif
1787}
1788
1789
1790/*
1791 *	Routine:	lck_rw_lock_shared_to_exclusive_ext
1792 *	Function:
1793 *		Improves a read-only lock to one with
1794 *		write permission.  If another reader has
1795 *		already requested an upgrade to a write lock,
1796 *		no lock is held upon return.
1797 *
1798 *		Returns FALSE if the upgrade *failed*.
1799 */
1800
1801boolean_t
1802lck_rw_lock_shared_to_exclusive_ext(
1803	lck_rw_ext_t	*lck,
1804	lck_rw_t	*rlck)
1805{
1806	int	    i;
1807	boolean_t	    do_wakeup = FALSE;
1808	wait_result_t      res;
1809	boolean_t		lock_miss = FALSE;
1810	boolean_t		lock_wait = FALSE;
1811	boolean_t		lock_stat;
1812#if	CONFIG_DTRACE
1813	uint64_t wait_interval = 0;
1814	int slept = 0;
1815#endif
1816
1817	lck_rw_check_type(lck, rlck);
1818
1819	if (lck->lck_rw_deb.thread == current_thread())
1820		panic("rw lock (%p) recursive lock attempt\n", rlck);
1821
1822	lck_rw_ilk_lock(&lck->lck_rw);
1823
1824	lock_stat = (lck->lck_rw_attr & LCK_RW_ATTR_STAT) ? TRUE : FALSE;
1825
1826	if (lock_stat)
1827		lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt++;
1828
1829	lck->lck_rw.lck_rw_shared_cnt--;
1830
1831	if (lck->lck_rw.lck_rw_want_upgrade) {
1832		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_START,
1833			     (int)rlck, lck->lck_rw.lck_rw_shared_cnt, lck->lck_rw.lck_rw_want_upgrade, 0, 0);
1834
1835		/*
1836		 *	Someone else has requested upgrade.
1837		 *	Since we've released a read lock, wake
1838		 *	him up.
1839		 */
1840		if (lck->lck_rw.lck_rw_waiting && (lck->lck_rw.lck_rw_shared_cnt == 0)) {
1841			lck->lck_rw.lck_rw_waiting = FALSE;
1842			do_wakeup = TRUE;
1843		}
1844
1845		lck_rw_ilk_unlock(&lck->lck_rw);
1846
1847		if (do_wakeup)
1848			thread_wakeup((event_t)(((unsigned int*)rlck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))));
1849
1850		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_END,
1851			     (int)rlck, lck->lck_rw.lck_rw_shared_cnt, lck->lck_rw.lck_rw_want_upgrade, 0, 0);
1852
1853		return (FALSE);
1854	}
1855
1856	lck->lck_rw.lck_rw_want_upgrade = TRUE;
1857
1858	while (lck->lck_rw.lck_rw_shared_cnt != 0) {
1859		i = lock_wait_time[1];
1860
1861		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_START,
1862			     (int)rlck, lck->lck_rw.lck_rw_shared_cnt, i, 0, 0);
1863
1864		if (lock_stat && !lock_miss) {
1865			lock_miss = TRUE;
1866			lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt++;
1867		}
1868#if	CONFIG_DTRACE
1869		if ((lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK]) && wait_interval == 0) {
1870			wait_interval = mach_absolute_time();
1871		} else {
1872			wait_interval = (unsigned) -1;
1873		}
1874#endif
1875
1876		if (i != 0) {
1877			lck_rw_ilk_unlock(&lck->lck_rw);
1878			while (--i != 0 && lck->lck_rw.lck_rw_shared_cnt != 0)
1879				continue;
1880			lck_rw_ilk_lock(&lck->lck_rw);
1881		}
1882
1883		if (lck->lck_rw.lck_rw_shared_cnt != 0) {
1884			lck->lck_rw.lck_rw_waiting = TRUE;
1885			res = assert_wait((event_t)(((unsigned int*)rlck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
1886			if (res == THREAD_WAITING) {
1887				if (lock_stat && !lock_wait) {
1888					lock_wait = TRUE;
1889					lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt++;
1890				}
1891				lck_rw_ilk_unlock(&lck->lck_rw);
1892				res = thread_block(THREAD_CONTINUE_NULL);
1893#if	CONFIG_DTRACE
1894				slept = 1;
1895#endif
1896				lck_rw_ilk_lock(&lck->lck_rw);
1897			}
1898		}
1899		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_END,
1900			     (int)rlck, lck->lck_rw.lck_rw_shared_cnt, 0, 0, 0);
1901	}
1902
1903	lck->lck_rw_deb.pc_excl = __builtin_return_address(0);
1904	if (LcksOpts & enaLkExtStck)
1905		lck_rw_ext_backtrace(lck);
1906	lck->lck_rw_deb.thread = current_thread();
1907
1908	lck_rw_ilk_unlock(&lck->lck_rw);
1909
1910#if	CONFIG_DTRACE
1911	/*
1912	 * If we've travelled a path with no spin or sleep, then wait_interval
1913	 * is still zero.
1914	 */
1915	if (wait_interval != 0 && wait_interval != (unsigned) -1) {
1916		if (slept == 0) {
1917			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1918		} else {
1919			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lck, mach_absolute_time() - wait_interval, 0);
1920		}
1921	}
1922#endif
1923
1924	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lck, 1);
1925
1926	return (TRUE);
1927}
1928
1929/*
1930 *      Routine:        lck_rw_lock_exclusive_to_shared_ext
1931 */
1932void
1933lck_rw_lock_exclusive_to_shared_ext(
1934	lck_rw_ext_t	*lck,
1935	lck_rw_t	*rlck)
1936{
1937	boolean_t	   do_wakeup = FALSE;
1938
1939	lck_rw_check_type(lck, rlck);
1940
1941	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
1942			     (int)rlck, lck->lck_rw.lck_rw_want_excl, lck->lck_rw.lck_rw_want_upgrade, 0, 0);
1943
1944	lck_rw_ilk_lock(&lck->lck_rw);
1945
1946	lck->lck_rw.lck_rw_shared_cnt++;
1947	if (lck->lck_rw.lck_rw_want_upgrade)
1948		lck->lck_rw.lck_rw_want_upgrade = FALSE;
1949	else if (lck->lck_rw.lck_rw_want_excl)
1950	 	lck->lck_rw.lck_rw_want_excl = FALSE;
1951	else
1952		panic("rw lock (%p) bad state (0x%08X) on attempt to release a shared or exlusive right\n",
1953			  rlck, lck->lck_rw.lck_rw_tag);
1954	if (lck->lck_rw_deb.thread == THREAD_NULL)
1955		panic("rw lock (%p) not held\n",
1956		      rlck);
1957	else if ( ((lck->lck_rw_attr & (LCK_RW_ATTR_DEBUG|LCK_RW_ATTR_DIS_THREAD)) == LCK_RW_ATTR_DEBUG)
1958		  && (lck->lck_rw_deb.thread != current_thread()))
1959		panic("rw lock (%p) unlocked by non-owner(%p), current owner(%p)\n",
1960			  rlck, current_thread(), lck->lck_rw_deb.thread);
1961
1962	lck->lck_rw_deb.thread = THREAD_NULL;
1963
1964	if (lck->lck_rw.lck_rw_waiting) {
1965		lck->lck_rw.lck_rw_waiting = FALSE;
1966		do_wakeup = TRUE;
1967	}
1968
1969	lck_rw_ilk_unlock(&lck->lck_rw);
1970
1971	if (do_wakeup)
1972		thread_wakeup((event_t)(((unsigned int*)rlck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))));
1973
1974	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
1975			     (int)rlck, lck->lck_rw.lck_rw_want_excl, lck->lck_rw.lck_rw_want_upgrade, lck->lck_rw.lck_rw_shared_cnt, 0);
1976
1977	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1978}
1979
1980
1981/*
1982 *	Routine:	lck_rw_try_lock_exclusive_ext
1983 *	Function:
1984 *		Tries to get a write lock.
1985 *
1986 *		Returns FALSE if the lock is not held on return.
1987 */
1988
1989boolean_t
1990lck_rw_try_lock_exclusive_ext(
1991	lck_rw_ext_t	*lck,
1992	lck_rw_t	*rlck)
1993{
1994	boolean_t		lock_stat;
1995
1996	lck_rw_check_type(lck, rlck);
1997
1998	lck_rw_ilk_lock(&lck->lck_rw);
1999
2000	lock_stat = (lck->lck_rw_attr & LCK_RW_ATTR_STAT) ? TRUE : FALSE;
2001
2002	if (lock_stat)
2003		lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt++;
2004
2005	if (lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade || lck->lck_rw.lck_rw_shared_cnt) {
2006		/*
2007		 *	Can't get lock.
2008		 */
2009		if (lock_stat) {
2010			lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt++;
2011		}
2012		lck_rw_ilk_unlock(&lck->lck_rw);
2013		return(FALSE);
2014	}
2015
2016	/*
2017	 *	Have lock.
2018	 */
2019
2020	lck->lck_rw.lck_rw_want_excl = TRUE;
2021	lck->lck_rw_deb.pc_excl = __builtin_return_address(0);
2022	if (LcksOpts & enaLkExtStck)
2023		lck_rw_ext_backtrace(lck);
2024	lck->lck_rw_deb.thread = current_thread();
2025
2026	lck_rw_ilk_unlock(&lck->lck_rw);
2027
2028	LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lck, 1);
2029
2030	return(TRUE);
2031}
2032
2033/*
2034 *	Routine:	lck_rw_try_lock_shared_ext
2035 *	Function:
2036 *		Tries to get a read lock.
2037 *
2038 *		Returns FALSE if the lock is not held on return.
2039 */
2040
2041boolean_t
2042lck_rw_try_lock_shared_ext(
2043	lck_rw_ext_t	*lck,
2044	lck_rw_t	*rlck)
2045{
2046	boolean_t		lock_stat;
2047
2048	lck_rw_check_type(lck, rlck);
2049
2050	lck_rw_ilk_lock(&lck->lck_rw);
2051
2052	lock_stat = (lck->lck_rw_attr & LCK_RW_ATTR_STAT) ? TRUE : FALSE;
2053
2054	if (lock_stat)
2055		lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt++;
2056
2057	if ((lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade) &&
2058	    ((lck->lck_rw.lck_rw_shared_cnt == 0) || (lck->lck_rw.lck_rw_priv_excl))) {
2059		if (lock_stat) {
2060			lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt++;
2061		}
2062		lck_rw_ilk_unlock(&lck->lck_rw);
2063		return(FALSE);
2064	}
2065
2066	lck->lck_rw.lck_rw_shared_cnt++;
2067
2068	lck_rw_ilk_unlock(&lck->lck_rw);
2069
2070	LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lck, 0);
2071
2072	return(TRUE);
2073}
2074
2075void
2076lck_rw_check_type(
2077	lck_rw_ext_t	*lck,
2078	lck_rw_t		*rlck)
2079{
2080	if (lck->lck_rw_deb.type != RW_TAG)
2081		panic("rw lock (%p) not a rw lock type (0x%08X)\n",rlck, lck->lck_rw_deb.type);
2082}
2083
2084void
2085lck_rw_assert_ext(
2086	lck_rw_ext_t	*lck,
2087	lck_rw_t	*rlck,
2088	unsigned int	type)
2089{
2090	lck_rw_check_type(lck, rlck);
2091
2092	switch (type) {
2093	case LCK_RW_ASSERT_SHARED:
2094		if (lck->lck_rw.lck_rw_shared_cnt != 0) {
2095			return;
2096		}
2097		break;
2098	case LCK_RW_ASSERT_EXCLUSIVE:
2099		if ((lck->lck_rw.lck_rw_want_excl ||
2100		     lck->lck_rw.lck_rw_want_upgrade) &&
2101		    lck->lck_rw.lck_rw_shared_cnt == 0) {
2102			return;
2103		}
2104		break;
2105	case LCK_RW_ASSERT_HELD:
2106		if (lck->lck_rw.lck_rw_want_excl ||
2107		    lck->lck_rw.lck_rw_want_upgrade ||
2108		    lck->lck_rw.lck_rw_shared_cnt != 0) {
2109			return;
2110		}
2111		break;
2112	default:
2113		break;
2114	}
2115
2116	panic("rw lock (%p -> %p) not held (mode=%u)\n", rlck, lck, type);
2117}
2118
2119void
2120lck_rw_assert(
2121	lck_rw_t	*lck,
2122	unsigned int	type)
2123{
2124	if (lck->lck_rw_tag != LCK_RW_TAG_INDIRECT) {
2125		switch (type) {
2126		case LCK_RW_ASSERT_SHARED:
2127			if (lck->lck_rw_shared_cnt != 0) {
2128				return;
2129			}
2130			break;
2131		case LCK_RW_ASSERT_EXCLUSIVE:
2132			if (lck->lck_rw_shared_cnt == 0 &&
2133			    (lck->lck_rw_want_excl ||
2134			     lck->lck_rw_want_upgrade)) {
2135				return;
2136			}
2137			break;
2138		case LCK_RW_ASSERT_HELD:
2139			if (lck->lck_rw_shared_cnt != 0 ||
2140			    lck->lck_rw_want_excl ||
2141			    lck->lck_rw_want_upgrade) {
2142				return;
2143			}
2144			break;
2145		default:
2146			break;
2147		}
2148		panic("rw lock (%p) not held (mode=%u)\n", lck, type);
2149	} else {
2150		lck_rw_assert_ext((lck_rw_ext_t *)lck->lck_rw_ptr,
2151				  (lck_rw_t *)lck,
2152				  type);
2153	}
2154}
2155
2156/*
2157 * The C portion of the mutex package.  These routines are only invoked
2158 * if the optimized assembler routines can't do the work.
2159 */
2160
2161/*
2162 * Forward definition
2163 */
2164
2165void lck_mtx_ext_init(
2166	lck_mtx_ext_t	*lck,
2167	lck_grp_t	*grp,
2168	lck_attr_t	*attr);
2169
2170/*
2171 *	Routine:	mutex_alloc
2172 *	Function:
2173 *		Allocate a mutex for external users who cannot
2174 *		hard-code the structure definition into their
2175 *		objects.
2176 *		For now just use kalloc, but a zone is probably
2177 *		warranted.
2178 */
2179mutex_t *
2180mutex_alloc(
2181	unsigned short	tag)
2182{
2183	mutex_t		*m;
2184
2185	if ((m = (mutex_t *)kalloc(sizeof(mutex_t))) != 0)
2186	  mutex_init(m, tag);
2187	return(m);
2188}
2189
2190/*
2191 *	Routine:	mutex_free
2192 */
2193void
2194mutex_free(
2195	mutex_t	*m)
2196{
2197	kfree((void *)m, sizeof(mutex_t));
2198}
2199
2200/*
2201 *      Routine:        lck_mtx_alloc_init
2202 */
2203lck_mtx_t *
2204lck_mtx_alloc_init(
2205	lck_grp_t	*grp,
2206	lck_attr_t	*attr) {
2207	lck_mtx_t	*lck;
2208
2209	if ((lck = (lck_mtx_t *)kalloc(sizeof(lck_mtx_t))) != 0)
2210		lck_mtx_init(lck, grp, attr);
2211
2212	return(lck);
2213}
2214
2215/*
2216 *      Routine:        lck_mtx_free
2217 */
2218void
2219lck_mtx_free(
2220	lck_mtx_t	*lck,
2221	lck_grp_t	*grp) {
2222	lck_mtx_destroy(lck, grp);
2223	kfree((void *)lck, sizeof(lck_mtx_t));
2224}
2225
2226/*
2227 *      Routine:        lck_mtx_init
2228 */
2229void
2230lck_mtx_init(
2231	lck_mtx_t	*lck,
2232	lck_grp_t	*grp,
2233	lck_attr_t	*attr) {
2234	lck_mtx_ext_t	*lck_ext;
2235	lck_attr_t	*lck_attr;
2236
2237	if (attr != LCK_ATTR_NULL)
2238		lck_attr = attr;
2239	else
2240		lck_attr = &LockDefaultLckAttr;
2241
2242	if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2243		if ((lck_ext = (lck_mtx_ext_t *)kalloc(sizeof(lck_mtx_ext_t))) != 0) {
2244			lck_mtx_ext_init(lck_ext, grp, lck_attr);
2245			lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
2246			lck->lck_mtx_ptr = lck_ext;
2247		}
2248	} else {
2249		lck->lck_mtx_data = 0;
2250		lck->lck_mtx_waiters = 0;
2251		lck->lck_mtx_pri = 0;
2252	}
2253	lck_grp_reference(grp);
2254	lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
2255}
2256
2257/*
2258 *      Routine:        lck_mtx_init_ext
2259 */
2260void
2261lck_mtx_init_ext(
2262	lck_mtx_t	*lck,
2263	lck_mtx_ext_t	*lck_ext,
2264	lck_grp_t	*grp,
2265	lck_attr_t	*attr)
2266{
2267	lck_attr_t	*lck_attr;
2268
2269	if (attr != LCK_ATTR_NULL)
2270		lck_attr = attr;
2271	else
2272		lck_attr = &LockDefaultLckAttr;
2273
2274	if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2275		lck_mtx_ext_init(lck_ext, grp, lck_attr);
2276		lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
2277		lck->lck_mtx_ptr = lck_ext;
2278	} else {
2279		lck->lck_mtx_data = 0;
2280		lck->lck_mtx_waiters = 0;
2281		lck->lck_mtx_pri = 0;
2282	}
2283	lck_grp_reference(grp);
2284	lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
2285}
2286
2287/*
2288 *      Routine:        lck_mtx_ext_init
2289 */
2290void
2291lck_mtx_ext_init(
2292	lck_mtx_ext_t	*lck,
2293	lck_grp_t	*grp,
2294	lck_attr_t	*attr) {
2295
2296	bzero((void *)lck, sizeof(lck_mtx_ext_t));
2297
2298	if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2299		lck->lck_mtx_deb.type = MUTEX_TAG;
2300		lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG;
2301	}
2302
2303	lck->lck_mtx_grp = grp;
2304
2305	if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT)
2306		 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
2307}
2308
2309/*
2310 *      Routine:        lck_mtx_destroy
2311 */
2312void
2313lck_mtx_destroy(
2314	lck_mtx_t	*lck,
2315	lck_grp_t	*grp) {
2316	boolean_t lck_is_indirect;
2317
2318	if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
2319		return;
2320	lck_is_indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT);
2321	lck->lck_mtx_tag = LCK_MTX_TAG_DESTROYED;
2322	if (lck_is_indirect)
2323		kfree((void *)lck->lck_mtx_ptr, sizeof(lck_mtx_ext_t));
2324
2325	lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
2326	lck_grp_deallocate(grp);
2327	return;
2328}
2329
2330
2331#if	MACH_KDB
2332/*
2333 * Routines to print out simple_locks and mutexes in a nicely-formatted
2334 * fashion.
2335 */
2336
2337const char *simple_lock_labels = "ENTRY    ILK THREAD   DURATION CALLER";
2338const char *mutex_labels = "ENTRY    LOCKED WAITERS   THREAD CALLER";
2339
2340void	db_print_simple_lock(
2341			simple_lock_t	addr);
2342
2343void	db_print_mutex(
2344			mutex_t		* addr);
2345
2346void
2347db_show_one_simple_lock (db_expr_t addr, boolean_t have_addr,
2348			 __unused db_expr_t count,
2349			 __unused char *modif)
2350{
2351	simple_lock_t	saddr = (simple_lock_t)(unsigned long)addr;
2352
2353	if (saddr == (simple_lock_t)0 || !have_addr) {
2354		db_error ("No simple_lock\n");
2355	}
2356#if	USLOCK_DEBUG
2357	else if (saddr->lock_type != USLOCK_TAG)
2358		db_error ("Not a simple_lock\n");
2359#endif	/* USLOCK_DEBUG */
2360
2361	db_printf ("%s\n", simple_lock_labels);
2362	db_print_simple_lock (saddr);
2363}
2364
2365void
2366db_print_simple_lock (
2367	simple_lock_t	addr)
2368{
2369
2370	db_printf ("%08x %3d", addr, *hw_lock_addr(addr->interlock));
2371#if	USLOCK_DEBUG
2372	db_printf (" %08x", addr->debug.lock_thread);
2373	db_printf (" %08x ", addr->debug.duration[1]);
2374	db_printsym ((int)addr->debug.lock_pc, DB_STGY_ANY);
2375#endif	/* USLOCK_DEBUG */
2376	db_printf ("\n");
2377}
2378
2379void
2380db_show_one_mutex (db_expr_t addr, boolean_t have_addr,
2381		   __unused db_expr_t count,
2382		   __unused char *modif)
2383{
2384	mutex_t		* maddr = (mutex_t *)(unsigned long)addr;
2385
2386	if (maddr == (mutex_t *)0 || !have_addr)
2387		db_error ("No mutex\n");
2388#if	MACH_LDEBUG
2389	else if (maddr->lck_mtx_deb.type != MUTEX_TAG)
2390		db_error ("Not a mutex\n");
2391#endif	/* MACH_LDEBUG */
2392
2393	db_printf ("%s\n", mutex_labels);
2394	db_print_mutex (maddr);
2395}
2396
2397void
2398db_print_mutex (
2399	mutex_t		* addr)
2400{
2401	db_printf ("%08x %6d %7d",
2402		   addr, *addr, addr->lck_mtx.lck_mtx_waiters);
2403#if	MACH_LDEBUG
2404	db_printf (" %08x ", addr->lck_mtx_deb.thread);
2405	db_printsym (addr->lck_mtx_deb.stack[0], DB_STGY_ANY);
2406#endif	/* MACH_LDEBUG */
2407	db_printf ("\n");
2408}
2409
2410void
2411db_show_one_lock(
2412	lock_t  *lock)
2413{
2414	db_printf("shared_count = 0x%x, %swant_upgrade, %swant_exclusive, ",
2415		  lock->lck_rw.lck_rw_shared_cnt,
2416		  lock->lck_rw.lck_rw_want_upgrade ? "" : "!",
2417		  lock->lck_rw.lck_rw_want_excl ? "" : "!");
2418	db_printf("%swaiting\n",
2419		  lock->lck_rw.lck_rw_waiting ? "" : "!");
2420	db_printf("%sInterlock\n",
2421		  lock->lck_rw.lck_rw_interlock ? "" : "!");
2422}
2423
2424#endif	/* MACH_KDB */
2425
2426