subr_turnstile.c revision 123363
1123992Ssobomax/*-
2103026Ssobomax * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3103026Ssobomax *
4139823Simp * Redistribution and use in source and binary forms, with or without
5103026Ssobomax * modification, are permitted provided that the following conditions
6103026Ssobomax * are met:
7103026Ssobomax * 1. Redistributions of source code must retain the above copyright
8103026Ssobomax *    notice, this list of conditions and the following disclaimer.
9103026Ssobomax * 2. Redistributions in binary form must reproduce the above copyright
10103026Ssobomax *    notice, this list of conditions and the following disclaimer in the
11148613Sbz *    documentation and/or other materials provided with the distribution.
12148613Sbz * 3. Berkeley Software Design Inc's name may not be used to endorse or
13103026Ssobomax *    promote products derived from this software without specific prior
14103026Ssobomax *    written permission.
15103026Ssobomax *
16103026Ssobomax * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17103026Ssobomax * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18103026Ssobomax * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19103026Ssobomax * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20103026Ssobomax * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21103026Ssobomax * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22103026Ssobomax * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23103026Ssobomax * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24103026Ssobomax * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25103026Ssobomax * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26103026Ssobomax * SUCH DAMAGE.
27103026Ssobomax *
28103026Ssobomax *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29103026Ssobomax *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30103026Ssobomax */
31103026Ssobomax
32103026Ssobomax/*
33103026Ssobomax * Implementation of turnstiles used to hold queue of threads blocked on
34103026Ssobomax * non-sleepable locks.  Sleepable locks use condition variables to
35103026Ssobomax * implement their queues.  Turnstiles differ from a sleep queue in that
36103026Ssobomax * turnstile queue's are assigned to a lock held by an owning thread.  Thus,
37103026Ssobomax * when one thread is enqueued onto a turnstile, it can lend its priority
38103026Ssobomax * to the owning thread.
39103026Ssobomax *
40103026Ssobomax * We wish to avoid bloating locks with an embedded turnstile and we do not
41103026Ssobomax * want to use back-pointers in the locks for the same reason.  Thus, we
42103026Ssobomax * use a similar approach to that of Solaris 7 as described in Solaris
43103026Ssobomax * Internals by Jim Mauro and Richard McDougall.  Turnstiles are looked up
44148613Sbz * in a hash table based on the address of the lock.  Each entry in the
45103026Ssobomax * hash table is a linked-lists of turnstiles and is called a turnstile
46103026Ssobomax * chain.  Each chain contains a spin mutex that protects all of the
47103026Ssobomax * turnstiles in the chain.
48103026Ssobomax *
49103026Ssobomax * Each time a thread is created, a turnstile is malloc'd and attached to
50103026Ssobomax * that thread.  When a thread blocks on a lock, if it is the first thread
51103394Sbde * to block, it lends its turnstile to the lock.  If the lock already has
52103026Ssobomax * a turnstile, then it gives its turnstile to the lock's turnstile's free
53122699Sbms * list.  When a thread is woken up, it takes a turnstile from the free list
54103026Ssobomax * if there are any other waiters.  If it is the only thread blocked on the
55103026Ssobomax * lock, then it reclaims the turnstile associated with the lock and removes
56103026Ssobomax * it from the hash table.
57103026Ssobomax *
58129880Sphk * XXX: We should probably implement some sort of sleep queue that condition
59103026Ssobomax * variables and sleepqueue's share.  On Solaris condition variables are
60164033Srwatson * implemented using a hash table of sleep queues similar to our current
61178888Sjulian * sleep queues.  We might want to investigate doing that ourselves.
62103026Ssobomax */
63103026Ssobomax
64103026Ssobomax#include <sys/cdefs.h>
65103026Ssobomax__FBSDID("$FreeBSD: head/sys/kern/subr_turnstile.c 123363 2003-12-09 21:09:54Z jhb $");
66103344Sbde
67103026Ssobomax#include <sys/param.h>
68103026Ssobomax#include <sys/systm.h>
69103026Ssobomax#include <sys/kernel.h>
70130933Sbrooks#include <sys/ktr.h>
71103026Ssobomax#include <sys/lock.h>
72103026Ssobomax#include <sys/malloc.h>
73196019Srwatson#include <sys/mutex.h>
74103026Ssobomax#include <sys/proc.h>
75103026Ssobomax#include <sys/queue.h>
76103026Ssobomax#include <sys/resourcevar.h>
77103026Ssobomax#include <sys/turnstile.h>
78103026Ssobomax#include <sys/sched.h>
79103026Ssobomax
80103026Ssobomax/*
81103026Ssobomax * Constants for the hash table of turnstile chains.  TC_SHIFT is a magic
82103026Ssobomax * number chosen because the sleep queue's use the same value for the
83103026Ssobomax * shift.  Basically, we ignore the lower 8 bits of the address.
84103026Ssobomax * TC_TABLESIZE must be a power of two for TC_MASK to work properly.
85103026Ssobomax */
86103026Ssobomax#define	TC_TABLESIZE	128			/* Must be power of 2. */
87103026Ssobomax#define	TC_MASK		(TC_TABLESIZE - 1)
88103026Ssobomax#define	TC_SHIFT	8
89103026Ssobomax#define	TC_HASH(lock)	(((uintptr_t)(lock) >> TC_SHIFT) & TC_MASK)
90103026Ssobomax#define	TC_LOOKUP(lock)	&turnstile_chains[TC_HASH(lock)]
91103026Ssobomax
92103026Ssobomax/*
93103026Ssobomax * There are three different lists of turnstiles as follows.  The list
94103026Ssobomax * connected by ts_link entries is a per-thread list of all the turnstiles
95103026Ssobomax * attached to locks that we own.  This is used to fixup our priority when
96103026Ssobomax * a lock is released.  The other two lists use the ts_hash entries.  The
97103026Ssobomax * first of these two is turnstile chain list that a turnstile is on when
98103026Ssobomax * it is attached to a lock.  The second list to use ts_hash is the free
99103026Ssobomax * list hung off a turnstile that is attached to a lock.
100127307Srwatson *
101127307Srwatson * Each turnstile contains two lists of threads.  The ts_blocked list is
102127307Srwatson * a linked list of threads blocked on the turnstile's lock.  The
103127307Srwatson * ts_pending list is a linked list of threads previously awoken by
104127307Srwatson * turnstile_signal() or turnstile_wait() that are waiting to be put on
105103026Ssobomax * the run queue.
106103026Ssobomax *
107103026Ssobomax * Locking key:
108103026Ssobomax *  c - turnstile chain lock
109160195Ssam *  q - td_contested lock
110105300Salfred */
111103032Ssobomaxstruct turnstile {
112103032Ssobomax	TAILQ_HEAD(, thread) ts_blocked;	/* (c + q) Blocked threads. */
113191148Skmacy	TAILQ_HEAD(, thread) ts_pending;	/* (c) Pending threads. */
114103026Ssobomax	LIST_ENTRY(turnstile) ts_hash;		/* (c) Chain and free list. */
115130933Sbrooks	LIST_ENTRY(turnstile) ts_link;		/* (q) Contested locks. */
116103026Ssobomax	LIST_HEAD(, turnstile) ts_free;		/* (c) Free turnstiles. */
117103032Ssobomax	struct lock_object *ts_lockobj;		/* (c) Lock we reference. */
118103026Ssobomax	struct thread *ts_owner;		/* (c + q) Who owns the lock. */
119105300Salfred};
120103026Ssobomax
121103026Ssobomaxstruct turnstile_chain {
122103026Ssobomax	LIST_HEAD(, turnstile) tc_turnstiles;	/* List of turnstiles. */
123152242Sru	struct mtx tc_lock;			/* Spin lock for this chain. */
124152242Sru};
125152242Sru
126152242Srustatic struct mtx td_contested_lock;
127152242Srustatic struct turnstile_chain turnstile_chains[TC_TABLESIZE];
128154625Sbz
129152242SruMALLOC_DEFINE(M_TURNSTILE, "turnstiles", "turnstiles");
130152242Sru
131152242Sru/*
132152242Sru * Prototypes for non-exported routines.
133103026Ssobomax */
134152242Srustatic void	init_turnstile0(void *dummy);
135152242Srustatic void	propagate_priority(struct thread *);
136152242Srustatic void	turnstile_setowner(struct turnstile *ts, struct thread *owner);
137152242Sru
138152242Sru/*
139154625Sbz * Walks the chain of turnstiles and their owners to propagate the priority
140152242Sru * of the thread being blocked to all the threads holding locks that have to
141152242Sru * release their locks before this thread can run again.
142152242Sru */
143152242Srustatic void
144103026Ssobomaxpropagate_priority(struct thread *td)
145103026Ssobomax{
146103026Ssobomax	struct turnstile_chain *tc;
147103026Ssobomax	struct turnstile *ts;
148123338Sbms	struct thread *td1;
149103026Ssobomax	int pri;
150103026Ssobomax
151103026Ssobomax	mtx_assert(&sched_lock, MA_OWNED);
152103026Ssobomax	pri = td->td_priority;
153103026Ssobomax	ts = td->td_blocked;
154103026Ssobomax	for (;;) {
155103026Ssobomax		td = ts->ts_owner;
156103026Ssobomax
157103026Ssobomax		if (td == NULL) {
158103026Ssobomax			/*
159103026Ssobomax			 * This really isn't quite right. Really
160103026Ssobomax			 * ought to bump priority of thread that
161103026Ssobomax			 * next acquires the lock.
162103026Ssobomax			 */
163103026Ssobomax			return;
164103026Ssobomax		}
165103026Ssobomax
166103032Ssobomax		MPASS(td->td_proc != NULL);
167103026Ssobomax		MPASS(td->td_proc->p_magic == P_MAGIC);
168103026Ssobomax
169103026Ssobomax		/*
170127307Srwatson		 * XXX: The owner of a turnstile can be stale if it is the
171103026Ssobomax		 * first thread to grab a slock of a sx lock.  In that case
172103026Ssobomax		 * it is possible for us to be at SSLEEP or some other
173103026Ssobomax		 * weird state.  We should probably just return if the state
174103026Ssobomax		 * isn't SRUN or SLOCK.
175103032Ssobomax		 */
176160195Ssam		KASSERT(!TD_IS_SLEEPING(td),
177103026Ssobomax		    ("sleeping thread (pid %d) owns a non-sleepable lock",
178103026Ssobomax		    td->td_proc->p_pid));
179160195Ssam
180103026Ssobomax		/*
181103026Ssobomax		 * If this thread already has higher priority than the
182103026Ssobomax		 * thread that is being blocked, we are finished.
183131673Sbms		 */
184103026Ssobomax		if (td->td_priority <= pri)
185147643Sbz			return;
186147643Sbz
187147643Sbz		/*
188147643Sbz		 * If lock holder is actually running, just bump priority.
189147643Sbz		 */
190147643Sbz		if (TD_IS_RUNNING(td)) {
191147643Sbz			td->td_priority = pri;
192147256Sbrooks			return;
193147643Sbz		}
194147256Sbrooks
195147256Sbrooks#ifndef SMP
196147256Sbrooks		/*
197147256Sbrooks		 * For UP, we check to see if td is curthread (this shouldn't
198147256Sbrooks		 * ever happen however as it would mean we are in a deadlock.)
199147256Sbrooks		 */
200147256Sbrooks		KASSERT(td != curthread, ("Deadlock detected"));
201103026Ssobomax#endif
202103026Ssobomax
203147256Sbrooks		/*
204103026Ssobomax		 * If on run queue move to new run queue, and quit.
205103026Ssobomax		 * XXXKSE this gets a lot more complicated under threads
206178888Sjulian		 * but try anyhow.
207125024Ssobomax		 */
208179894Sthompsa		if (TD_ON_RUNQ(td)) {
209147256Sbrooks			MPASS(td->td_blocked == NULL);
210147256Sbrooks			sched_prio(td, pri);
211127307Srwatson			return;
212103026Ssobomax		}
213127307Srwatson
214103026Ssobomax		/*
215103026Ssobomax		 * Bump this thread's priority.
216103026Ssobomax		 */
217103032Ssobomax		td->td_priority = pri;
218127307Srwatson
219127307Srwatson		/*
220127307Srwatson		 * If we aren't blocked on a lock, we should be.
221127307Srwatson		 */
222127307Srwatson		KASSERT(TD_ON_LOCK(td), (
223127307Srwatson		    "process %d(%s):%d holds %s but isn't blocked on a lock\n",
224127307Srwatson		    td->td_proc->p_pid, td->td_proc->p_comm, td->td_state,
225127307Srwatson		    ts->ts_lockobj->lo_name));
226151266Sthompsa
227151266Sthompsa		/*
228151266Sthompsa		 * Pick up the lock that td is blocked on.
229151266Sthompsa		 */
230151266Sthompsa		ts = td->td_blocked;
231151266Sthompsa		MPASS(ts != NULL);
232151266Sthompsa		tc = TC_LOOKUP(ts->ts_lockobj);
233151266Sthompsa		mtx_lock_spin(&tc->tc_lock);
234151266Sthompsa
235127307Srwatson		/*
236127307Srwatson		 * This thread may not be blocked on this turnstile anymore
237103026Ssobomax		 * but instead might already be woken up on another CPU
238103026Ssobomax		 * that is waiting on sched_lock in turnstile_unpend() to
239103026Ssobomax		 * finish waking this thread up.  We can detect this case
240103026Ssobomax		 * by checking to see if this thread has been given a
241103032Ssobomax		 * turnstile by either turnstile_signal() or
242103026Ssobomax		 * turnstile_wakeup().  In this case, treat the thread as
243191148Skmacy		 * if it was already running.
244103026Ssobomax		 */
245103026Ssobomax		if (td->td_turnstile != NULL) {
246103026Ssobomax			mtx_unlock_spin(&tc->tc_lock);
247103026Ssobomax			return;
248103026Ssobomax		}
249180041Sjulian
250180041Sjulian		/*
251123992Ssobomax		 * Check if the thread needs to be moved up on
252103026Ssobomax		 * the blocked chain.  It doesn't need to be moved
253147611Sdwmalone		 * if it is already at the head of the list or if
254180639Sjulian		 * the item in front of it still has a higher priority.
255103026Ssobomax		 */
256103026Ssobomax		if (td == TAILQ_FIRST(&ts->ts_blocked)) {
257103026Ssobomax			mtx_unlock_spin(&tc->tc_lock);
258103026Ssobomax			continue;
259103026Ssobomax		}
260103026Ssobomax
261103026Ssobomax		td1 = TAILQ_PREV(td, threadqueue, td_lockq);
262147256Sbrooks		if (td1->td_priority <= pri) {
263103026Ssobomax			mtx_unlock_spin(&tc->tc_lock);
264103026Ssobomax			continue;
265103026Ssobomax		}
266103026Ssobomax
267103026Ssobomax		/*
268148887Srwatson		 * Remove thread from blocked chain and determine where
269148887Srwatson		 * it should be moved up to.  Since we know that td1 has
270103026Ssobomax		 * a lower priority than td, we know that at least one
271103026Ssobomax		 * thread in the chain has a lower priority and that
272103026Ssobomax		 * td1 will thus not be NULL after the loop.
273103026Ssobomax		 */
274103026Ssobomax		mtx_lock_spin(&td_contested_lock);
275103026Ssobomax		TAILQ_REMOVE(&ts->ts_blocked, td, td_lockq);
276103026Ssobomax		TAILQ_FOREACH(td1, &ts->ts_blocked, td_lockq) {
277103026Ssobomax			MPASS(td1->td_proc->p_magic == P_MAGIC);
278103026Ssobomax			if (td1->td_priority > pri)
279147611Sdwmalone				break;
280147611Sdwmalone		}
281147611Sdwmalone
282147611Sdwmalone		MPASS(td1 != NULL);
283147611Sdwmalone		TAILQ_INSERT_BEFORE(td1, td, td_lockq);
284147611Sdwmalone		mtx_unlock_spin(&td_contested_lock);
285159180Scsjp		CTR4(KTR_LOCK,
286147611Sdwmalone		    "propagate_priority: td %p moved before %p on [%p] %s",
287123922Ssam		    td, td1, ts->ts_lockobj, ts->ts_lockobj->lo_name);
288103026Ssobomax		mtx_unlock_spin(&tc->tc_lock);
289103026Ssobomax	}
290103026Ssobomax}
291103026Ssobomax
292103026Ssobomax/*
293103026Ssobomax * Early initialization of turnstiles.  This is not done via a SYSINIT()
294103026Ssobomax * since this needs to be initialized very early when mutexes are first
295103026Ssobomax * initialized.
296103026Ssobomax */
297103026Ssobomaxvoid
298103026Ssobomaxinit_turnstiles(void)
299103026Ssobomax{
300103026Ssobomax	int i;
301103026Ssobomax
302103026Ssobomax	for (i = 0; i < TC_TABLESIZE; i++) {
303158416Shsu		LIST_INIT(&turnstile_chains[i].tc_turnstiles);
304103026Ssobomax		mtx_init(&turnstile_chains[i].tc_lock, "turnstile chain",
305103026Ssobomax		    NULL, MTX_SPIN);
306103026Ssobomax	}
307103026Ssobomax	mtx_init(&td_contested_lock, "td_contested", NULL, MTX_SPIN);
308103026Ssobomax	thread0.td_turnstile = NULL;
309103026Ssobomax}
310103026Ssobomax
311103026Ssobomaxstatic void
312103026Ssobomaxinit_turnstile0(void *dummy)
313103026Ssobomax{
314103026Ssobomax
315103026Ssobomax	thread0.td_turnstile = turnstile_alloc();
316103026Ssobomax}
317103026SsobomaxSYSINIT(turnstile0, SI_SUB_LOCK, SI_ORDER_ANY, init_turnstile0, NULL);
318103026Ssobomax
319103026Ssobomax/*
320103026Ssobomax * Set the owner of the lock this turnstile is attached to.
321103026Ssobomax */
322103026Ssobomaxstatic void
323103026Ssobomaxturnstile_setowner(struct turnstile *ts, struct thread *owner)
324103026Ssobomax{
325103026Ssobomax
326103026Ssobomax	mtx_assert(&td_contested_lock, MA_OWNED);
327103026Ssobomax	MPASS(owner->td_proc->p_magic == P_MAGIC);
328123992Ssobomax	MPASS(ts->ts_owner == NULL);
329103026Ssobomax	ts->ts_owner = owner;
330103026Ssobomax	LIST_INSERT_HEAD(&owner->td_contested, ts, ts_link);
331103026Ssobomax}
332151967Sandre
333103026Ssobomax/*
334103026Ssobomax * Malloc a turnstile for a new thread, initialize it and return it.
335103026Ssobomax */
336103026Ssobomaxstruct turnstile *
337103026Ssobomaxturnstile_alloc(void)
338103026Ssobomax{
339103026Ssobomax	struct turnstile *ts;
340103026Ssobomax
341103026Ssobomax	ts = malloc(sizeof(struct turnstile), M_TURNSTILE, M_WAITOK | M_ZERO);
342103026Ssobomax	TAILQ_INIT(&ts->ts_blocked);
343103026Ssobomax	TAILQ_INIT(&ts->ts_pending);
344103026Ssobomax	LIST_INIT(&ts->ts_free);
345103026Ssobomax	return (ts);
346103026Ssobomax}
347103026Ssobomax
348103026Ssobomax/*
349103026Ssobomax * Free a turnstile when a thread is destroyed.
350103026Ssobomax */
351103026Ssobomaxvoid
352103026Ssobomaxturnstile_free(struct turnstile *ts)
353103026Ssobomax{
354103026Ssobomax
355103026Ssobomax	MPASS(ts != NULL);
356103026Ssobomax	MPASS(TAILQ_EMPTY(&ts->ts_blocked));
357103026Ssobomax	MPASS(TAILQ_EMPTY(&ts->ts_pending));
358103026Ssobomax	free(ts, M_TURNSTILE);
359103026Ssobomax}
360103026Ssobomax
361103026Ssobomax/*
362103026Ssobomax * Look up the turnstile for a lock in the hash table locking the associated
363103026Ssobomax * turnstile chain along the way.  Return with the turnstile chain locked.
364103026Ssobomax * If no turnstile is found in the hash table, NULL is returned.
365103026Ssobomax */
366103026Ssobomaxstruct turnstile *
367103026Ssobomaxturnstile_lookup(struct lock_object *lock)
368180041Sjulian{
369180041Sjulian	struct turnstile_chain *tc;
370180639Sjulian	struct turnstile *ts;
371180639Sjulian
372180639Sjulian	tc = TC_LOOKUP(lock);
373180639Sjulian	mtx_lock_spin(&tc->tc_lock);
374180639Sjulian	LIST_FOREACH(ts, &tc->tc_turnstiles, ts_hash)
375180639Sjulian		if (ts->ts_lockobj == lock)
376103026Ssobomax			return (ts);
377148613Sbz	return (NULL);
378148613Sbz}
379180041Sjulian
380148613Sbz/*
381148613Sbz * Unlock the turnstile chain associated with a given lock.
382148613Sbz */
383103026Ssobomaxvoid
384103026Ssobomaxturnstile_release(struct lock_object *lock)
385103026Ssobomax{
386103026Ssobomax	struct turnstile_chain *tc;
387103026Ssobomax
388103026Ssobomax	tc = TC_LOOKUP(lock);
389103026Ssobomax	mtx_unlock_spin(&tc->tc_lock);
390103026Ssobomax}
391103026Ssobomax
392103026Ssobomax/*
393103026Ssobomax * Take ownership of a turnstile and adjust the priority of the new
394179894Sthompsa * owner appropriately.
395179894Sthompsa */
396180639Sjulianvoid
397179894Sthompsaturnstile_claim(struct turnstile *ts)
398179894Sthompsa{
399179894Sthompsa	struct turnstile_chain *tc;
400103026Ssobomax	struct thread *td, *owner;
401103026Ssobomax
402103026Ssobomax	tc = TC_LOOKUP(ts->ts_lockobj);
403103026Ssobomax	mtx_assert(&tc->tc_lock, MA_OWNED);
404103026Ssobomax
405103026Ssobomax	owner = curthread;
406103026Ssobomax	mtx_lock_spin(&td_contested_lock);
407128580Sandre	turnstile_setowner(ts, owner);
408103026Ssobomax	mtx_unlock_spin(&td_contested_lock);
409103026Ssobomax
410103026Ssobomax	td = TAILQ_FIRST(&ts->ts_blocked);
411103026Ssobomax	MPASS(td != NULL);
412103026Ssobomax	MPASS(td->td_proc->p_magic == P_MAGIC);
413178888Sjulian	mtx_unlock_spin(&tc->tc_lock);
414178888Sjulian
415103026Ssobomax	/*
416103026Ssobomax	 * Update the priority of the new owner if needed.
417179894Sthompsa	 */
418179894Sthompsa	mtx_lock_spin(&sched_lock);
419180639Sjulian	if (td->td_priority < owner->td_priority)
420103026Ssobomax		owner->td_priority = td->td_priority;
421179894Sthompsa	mtx_unlock_spin(&sched_lock);
422179894Sthompsa}
423179894Sthompsa
424179894Sthompsa/*
425179894Sthompsa * Block the current thread on the turnstile ts.  This function will context
426179894Sthompsa * switch and not return until this thread has been woken back up.  This
427179894Sthompsa * function must be called with the appropriate turnstile chain locked and
428179894Sthompsa * will return with it unlocked.
429103026Ssobomax */
430103026Ssobomaxvoid
431103026Ssobomaxturnstile_wait(struct turnstile *ts, struct lock_object *lock,
432103026Ssobomax    struct thread *owner)
433103026Ssobomax{
434103026Ssobomax	struct turnstile_chain *tc;
435133163Ssobomax	struct thread *td, *td1;
436103026Ssobomax
437103032Ssobomax	td = curthread;
438180041Sjulian	tc = TC_LOOKUP(lock);
439180041Sjulian	mtx_assert(&tc->tc_lock, MA_OWNED);
440125226Ssobomax	MPASS(td->td_turnstile != NULL);
441103026Ssobomax	MPASS(owner != NULL);
442103026Ssobomax	MPASS(owner->td_proc->p_magic == P_MAGIC);
443103026Ssobomax
444103026Ssobomax	/* If the passed in turnstile is NULL, use this thread's turnstile. */
445128583Sandre	if (ts == NULL) {
446128583Sandre		ts = td->td_turnstile;
447128583Sandre		LIST_INSERT_HEAD(&tc->tc_turnstiles, ts, ts_hash);
448128583Sandre		KASSERT(TAILQ_EMPTY(&ts->ts_pending),
449128583Sandre		    ("thread's turnstile has pending threads"));
450128580Sandre		KASSERT(TAILQ_EMPTY(&ts->ts_blocked),
451123992Ssobomax		    ("thread's turnstile has a non-empty queue"));
452103026Ssobomax		KASSERT(LIST_EMPTY(&ts->ts_free),
453103026Ssobomax		    ("thread's turnstile has a non-empty free list"));
454103026Ssobomax		KASSERT(ts->ts_lockobj == NULL, ("stale ts_lockobj pointer"));
455103026Ssobomax		ts->ts_lockobj = lock;
456103026Ssobomax		mtx_lock_spin(&td_contested_lock);
457103026Ssobomax		TAILQ_INSERT_TAIL(&ts->ts_blocked, td, td_lockq);
458103026Ssobomax		turnstile_setowner(ts, owner);
459103032Ssobomax		mtx_unlock_spin(&td_contested_lock);
460103026Ssobomax	} else {
461103026Ssobomax		TAILQ_FOREACH(td1, &ts->ts_blocked, td_lockq)
462103026Ssobomax			if (td1->td_priority > td->td_priority)
463103026Ssobomax				break;
464103026Ssobomax		mtx_lock_spin(&td_contested_lock);
465103026Ssobomax		if (td1 != NULL)
466103026Ssobomax			TAILQ_INSERT_BEFORE(td1, td, td_lockq);
467103026Ssobomax		else
468103026Ssobomax			TAILQ_INSERT_TAIL(&ts->ts_blocked, td, td_lockq);
469179894Sthompsa		mtx_unlock_spin(&td_contested_lock);
470103026Ssobomax		MPASS(td->td_turnstile != NULL);
471179894Sthompsa		LIST_INSERT_HEAD(&ts->ts_free, td->td_turnstile, ts_hash);
472103026Ssobomax		MPASS(owner == ts->ts_owner);
473103026Ssobomax	}
474179894Sthompsa	td->td_turnstile = NULL;
475103026Ssobomax	mtx_unlock_spin(&tc->tc_lock);
476103026Ssobomax
477103026Ssobomax	mtx_lock_spin(&sched_lock);
478103026Ssobomax	/*
479103026Ssobomax	 * Handle race condition where a thread on another CPU that owns
480103026Ssobomax	 * lock 'lock' could have woken us in between us dropping the
481125020Ssobomax	 * turnstile chain lock and acquiring the sched_lock.
482103026Ssobomax	 */
483103026Ssobomax	if (td->td_flags & TDF_TSNOBLOCK) {
484164033Srwatson		td->td_flags &= ~TDF_TSNOBLOCK;
485171056Srwatson		mtx_unlock_spin(&sched_lock);
486171056Srwatson		return;
487164033Srwatson	}
488164033Srwatson
489103026Ssobomax#ifdef notyet
490103026Ssobomax	/*
491103026Ssobomax	 * If we're borrowing an interrupted thread's VM context, we
492103026Ssobomax	 * must clean up before going to sleep.
493103026Ssobomax	 */
494125024Ssobomax	if (td->td_ithd != NULL) {
495125024Ssobomax		struct ithd *it = td->td_ithd;
496125024Ssobomax
497125024Ssobomax		if (it->it_interrupted) {
498103026Ssobomax			if (LOCK_LOG_TEST(lock, 0))
499103026Ssobomax				CTR3(KTR_LOCK, "%s: %p interrupted %p",
500164033Srwatson				    __func__, it, it->it_interrupted);
501171056Srwatson			intr_thd_fixup(it);
502171056Srwatson		}
503164033Srwatson	}
504164033Srwatson#endif
505103026Ssobomax
506103026Ssobomax	/* Save who we are blocked on and switch. */
507103026Ssobomax	td->td_blocked = ts;
508103026Ssobomax	td->td_lockname = lock->lo_name;
509103026Ssobomax	TD_SET_LOCK(td);
510103026Ssobomax	propagate_priority(td);
511103026Ssobomax
512103026Ssobomax	if (LOCK_LOG_TEST(lock, 0))
513147256Sbrooks		CTR4(KTR_LOCK, "%s: td %p blocked on [%p] %s", __func__, td,
514103026Ssobomax		    lock, lock->lo_name);
515103026Ssobomax
516164033Srwatson	td->td_proc->p_stats->p_ru.ru_nvcsw++;
517171056Srwatson	mi_switch();
518171056Srwatson
519164033Srwatson	if (LOCK_LOG_TEST(lock, 0))
520164033Srwatson		CTR4(KTR_LOCK, "%s: td %p free from blocked on [%p] %s",
521164033Srwatson		    __func__, td, lock, lock->lo_name);
522164033Srwatson
523164033Srwatson	mtx_unlock_spin(&sched_lock);
524164033Srwatson}
525164033Srwatson
526164033Srwatson/*
527164033Srwatson * Pick the highest priority thread on this turnstile and put it on the
528164033Srwatson * pending list.  This must be called with the turnstile chain locked.
529164033Srwatson */
530164033Srwatsonint
531164033Srwatsonturnstile_signal(struct turnstile *ts)
532164033Srwatson{
533164033Srwatson	struct turnstile_chain *tc;
534164033Srwatson	struct thread *td;
535164033Srwatson	int empty;
536164033Srwatson
537164033Srwatson	MPASS(ts != NULL);
538164033Srwatson	MPASS(curthread->td_proc->p_magic == P_MAGIC);
539164033Srwatson	MPASS(ts->ts_owner == curthread);
540103026Ssobomax	tc = TC_LOOKUP(ts->ts_lockobj);
541164033Srwatson	mtx_assert(&tc->tc_lock, MA_OWNED);
542171056Srwatson
543171056Srwatson	/*
544164033Srwatson	 * Pick the highest priority thread blocked on this lock and
545164033Srwatson	 * move it to the pending list.
546103026Ssobomax	 */
547103026Ssobomax	td = TAILQ_FIRST(&ts->ts_blocked);
548103026Ssobomax	MPASS(td->td_proc->p_magic == P_MAGIC);
549103026Ssobomax	mtx_lock_spin(&td_contested_lock);
550103026Ssobomax	TAILQ_REMOVE(&ts->ts_blocked, td, td_lockq);
551103026Ssobomax	mtx_unlock_spin(&td_contested_lock);
552103026Ssobomax	TAILQ_INSERT_TAIL(&ts->ts_pending, td, td_lockq);
553103026Ssobomax
554103026Ssobomax	/*
555103026Ssobomax	 * If the turnstile is now empty, remove it from its chain and
556148613Sbz	 * give it to the about-to-be-woken thread.  Otherwise take a
557148613Sbz	 * turnstile from the free list and give it to the thread.
558148613Sbz	 */
559148613Sbz	empty = TAILQ_EMPTY(&ts->ts_blocked);
560103026Ssobomax	if (empty)
561103026Ssobomax		MPASS(LIST_EMPTY(&ts->ts_free));
562103026Ssobomax	else
563103026Ssobomax		ts = LIST_FIRST(&ts->ts_free);
564103026Ssobomax	MPASS(ts != NULL);
565103026Ssobomax	LIST_REMOVE(ts, ts_hash);
566164033Srwatson	td->td_turnstile = ts;
567171056Srwatson
568171056Srwatson	return (empty);
569164033Srwatson}
570164033Srwatson
571103026Ssobomax/*
572103026Ssobomax * Put all blocked threads on the pending list.  This must be called with
573103026Ssobomax * the turnstile chain locked.
574103026Ssobomax */
575103026Ssobomaxvoid
576103026Ssobomaxturnstile_wakeup(struct turnstile *ts)
577103026Ssobomax{
578103026Ssobomax	struct turnstile_chain *tc;
579103026Ssobomax	struct turnstile *ts1;
580103026Ssobomax	struct thread *td;
581103026Ssobomax
582103026Ssobomax	MPASS(ts != NULL);
583103026Ssobomax	MPASS(curthread->td_proc->p_magic == P_MAGIC);
584103026Ssobomax	MPASS(ts->ts_owner == curthread);
585103026Ssobomax	tc = TC_LOOKUP(ts->ts_lockobj);
586103026Ssobomax	mtx_assert(&tc->tc_lock, MA_OWNED);
587103026Ssobomax
588103026Ssobomax	/*
589103026Ssobomax	 * Transfer the blocked list to the pending list.
590164033Srwatson	 */
591164033Srwatson	mtx_lock_spin(&td_contested_lock);
592164033Srwatson	TAILQ_CONCAT(&ts->ts_pending, &ts->ts_blocked, td_lockq);
593103026Ssobomax	mtx_unlock_spin(&td_contested_lock);
594103026Ssobomax
595103026Ssobomax	/*
596103026Ssobomax	 * Give a turnstile to each thread.  The last thread gets
597103026Ssobomax	 * this turnstile.
598103026Ssobomax	 */
599103026Ssobomax	TAILQ_FOREACH(td, &ts->ts_pending, td_lockq) {
600103026Ssobomax		if (LIST_EMPTY(&ts->ts_free)) {
601103026Ssobomax			MPASS(TAILQ_NEXT(td, td_lockq) == NULL);
602103026Ssobomax			ts1 = ts;
603103026Ssobomax		} else
604103026Ssobomax			ts1 = LIST_FIRST(&ts->ts_free);
605103026Ssobomax		MPASS(ts1 != NULL);
606103026Ssobomax		LIST_REMOVE(ts1, ts_hash);
607103026Ssobomax		td->td_turnstile = ts1;
608103026Ssobomax	}
609103026Ssobomax}
610103026Ssobomax
611103026Ssobomax/*
612103026Ssobomax * Wakeup all threads on the pending list and adjust the priority of the
613103026Ssobomax * current thread appropriately.  This must be called with the turnstile
614103026Ssobomax * chain locked.
615103026Ssobomax */
616103026Ssobomaxvoid
617103026Ssobomaxturnstile_unpend(struct turnstile *ts)
618103026Ssobomax{
619103026Ssobomax	TAILQ_HEAD( ,thread) pending_threads;
620103026Ssobomax	struct turnstile_chain *tc;
621125020Ssobomax	struct thread *td;
622103026Ssobomax	int cp, pri;
623103026Ssobomax
624103026Ssobomax	MPASS(ts != NULL);
625103026Ssobomax	MPASS(ts->ts_owner == curthread);
626103026Ssobomax	tc = TC_LOOKUP(ts->ts_lockobj);
627103026Ssobomax	mtx_assert(&tc->tc_lock, MA_OWNED);
628103026Ssobomax	MPASS(!TAILQ_EMPTY(&ts->ts_pending));
629103026Ssobomax
630147256Sbrooks	/*
631103026Ssobomax	 * Move the list of pending threads out of the turnstile and
632103026Ssobomax	 * into a local variable.
633103026Ssobomax	 */
634103026Ssobomax	TAILQ_INIT(&pending_threads);
635148887Srwatson	TAILQ_CONCAT(&pending_threads, &ts->ts_pending, td_lockq);
636103026Ssobomax#ifdef INVARIANTS
637148887Srwatson	if (TAILQ_EMPTY(&ts->ts_blocked))
638103026Ssobomax		ts->ts_lockobj = NULL;
639103026Ssobomax#endif
640103026Ssobomax
641103026Ssobomax	/*
642103026Ssobomax	 * Remove the turnstile from this thread's list of contested locks
643103026Ssobomax	 * since this thread doesn't own it anymore.  New threads will
644103026Ssobomax	 * not be blocking on the turnstile until it is claimed by a new
645103026Ssobomax	 * owner.
646103026Ssobomax	 */
647103026Ssobomax	mtx_lock_spin(&td_contested_lock);
648103026Ssobomax	ts->ts_owner = NULL;
649103026Ssobomax	LIST_REMOVE(ts, ts_link);
650103026Ssobomax	mtx_unlock_spin(&td_contested_lock);
651103026Ssobomax	mtx_unlock_spin(&tc->tc_lock);
652103026Ssobomax
653103026Ssobomax	/*
654103026Ssobomax	 * Adjust the priority of curthread based on other contested
655103026Ssobomax	 * locks it owns.  Don't lower the priority below the base
656103026Ssobomax	 * priority however.
657164033Srwatson	 */
658171056Srwatson	td = curthread;
659171056Srwatson	pri = PRI_MAX;
660164033Srwatson	mtx_lock_spin(&sched_lock);
661164033Srwatson	mtx_lock_spin(&td_contested_lock);
662103026Ssobomax	LIST_FOREACH(ts, &td->td_contested, ts_link) {
663103026Ssobomax		cp = TAILQ_FIRST(&ts->ts_blocked)->td_priority;
664103026Ssobomax		if (cp < pri)
665103026Ssobomax			pri = cp;
666103026Ssobomax	}
667103026Ssobomax	mtx_unlock_spin(&td_contested_lock);
668103026Ssobomax	if (pri > td->td_base_pri)
669103026Ssobomax		pri = td->td_base_pri;
670103026Ssobomax	td->td_priority = pri;
671103026Ssobomax
672103026Ssobomax	/*
673103026Ssobomax	 * Wake up all the pending threads.  If a thread is not blocked
674103026Ssobomax	 * on a lock, then it is currently executing on another CPU in
675103026Ssobomax	 * turnstile_wait().  Set a flag to force it to try to acquire
676103026Ssobomax	 * the lock again instead of blocking.
677164033Srwatson	 */
678171056Srwatson	while (!TAILQ_EMPTY(&pending_threads)) {
679171056Srwatson		td = TAILQ_FIRST(&pending_threads);
680164033Srwatson		TAILQ_REMOVE(&pending_threads, td, td_lockq);
681164033Srwatson		MPASS(td->td_proc->p_magic == P_MAGIC);
682103026Ssobomax		if (TD_ON_LOCK(td)) {
683103026Ssobomax			td->td_blocked = NULL;
684103026Ssobomax			td->td_lockname = NULL;
685103026Ssobomax			TD_CLR_LOCK(td);
686103026Ssobomax			MPASS(TD_CAN_RUN(td));
687103026Ssobomax			setrunqueue(td);
688103026Ssobomax		} else {
689103026Ssobomax			td->td_flags |= TDF_TSNOBLOCK;
690103026Ssobomax			MPASS(TD_IS_RUNNING(td));
691103026Ssobomax		}
692103026Ssobomax	}
693155440Sqingli	mtx_unlock_spin(&sched_lock);
694103026Ssobomax}
695155440Sqingli
696103026Ssobomax/*
697103026Ssobomax * Return the first thread in a turnstile.
698164033Srwatson */
699171056Srwatsonstruct thread *
700171056Srwatsonturnstile_head(struct turnstile *ts)
701164033Srwatson{
702164033Srwatson#ifdef INVARIANTS
703103026Ssobomax	struct turnstile_chain *tc;
704103026Ssobomax
705103026Ssobomax	MPASS(ts != NULL);
706103026Ssobomax	tc = TC_LOOKUP(ts->ts_lockobj);
707103026Ssobomax	mtx_assert(&tc->tc_lock, MA_OWNED);
708103026Ssobomax#endif
709103026Ssobomax	return (TAILQ_FIRST(&ts->ts_blocked));
710103026Ssobomax}
711103026Ssobomax
712103026Ssobomax/*
713103026Ssobomax * Returns true if a turnstile is empty.
714103026Ssobomax */
715103026Ssobomaxint
716103026Ssobomaxturnstile_empty(struct turnstile *ts)
717103026Ssobomax{
718103026Ssobomax#ifdef INVARIANTS
719103026Ssobomax	struct turnstile_chain *tc;
720103026Ssobomax
721103026Ssobomax	MPASS(ts != NULL);
722122699Sbms	tc = TC_LOOKUP(ts->ts_lockobj);
723122699Sbms	mtx_assert(&tc->tc_lock, MA_OWNED);
724122699Sbms#endif
725103026Ssobomax	return (TAILQ_EMPTY(&ts->ts_blocked));
726103026Ssobomax}
727103026Ssobomax