rf_stripelocks.c revision 1.30
1/*	$NetBSD: rf_stripelocks.c,v 1.30 2009/03/15 17:17:23 cegger Exp $	*/
2/*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Authors: Mark Holland, Jim Zelenka
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29/*
30 * stripelocks.c -- code to lock stripes for read and write access
31 *
32 * The code distinguishes between read locks and write locks. There can be
33 * as many readers to given stripe as desired. When a write request comes
34 * in, no further readers are allowed to enter, and all subsequent requests
35 * are queued in FIFO order. When a the number of readers goes to zero, the
36 * writer is given the lock. When a writer releases the lock, the list of
37 * queued requests is scanned, and all readersq up to the next writer are
38 * given the lock.
39 *
40 * The lock table size must be one less than a power of two, but HASH_STRIPEID
41 * is the only function that requires this.
42 *
43 * The code now supports "range locks". When you ask to lock a stripe, you
44 * specify a range of addresses in that stripe that you want to lock. When
45 * you acquire the lock, you've locked only this range of addresses, and
46 * other threads can concurrently read/write any non-overlapping portions
47 * of the stripe. The "addresses" that you lock are abstract in that you
48 * can pass in anything you like.  The expectation is that you'll pass in
49 * the range of physical disk offsets of the parity bits you're planning
50 * to update. The idea behind this, of course, is to allow sub-stripe
51 * locking. The implementation is perhaps not the best imaginable; in the
52 * worst case a lock release is O(n^2) in the total number of outstanding
53 * requests to a given stripe.  Note that if you're striping with a
54 * stripe unit size equal to an entire disk (i.e. not striping), there will
55 * be only one stripe and you may spend some significant number of cycles
56 * searching through stripe lock descriptors.
57 */
58
59#include <sys/cdefs.h>
60__KERNEL_RCSID(0, "$NetBSD: rf_stripelocks.c,v 1.30 2009/03/15 17:17:23 cegger Exp $");
61
62#include <dev/raidframe/raidframevar.h>
63
64#include "rf_raid.h"
65#include "rf_stripelocks.h"
66#include "rf_alloclist.h"
67#include "rf_debugprint.h"
68#include "rf_general.h"
69#include "rf_driver.h"
70#include "rf_shutdown.h"
71
72#ifdef DEBUG
73
74#define Dprintf1(s,a)         rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL)
75#define Dprintf2(s,a,b)       rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL)
76#define Dprintf3(s,a,b,c)     rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL)
77#define Dprintf4(s,a,b,c,d)   rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),NULL,NULL,NULL,NULL)
78#define Dprintf5(s,a,b,c,d,e) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),NULL,NULL,NULL)
79#define Dprintf6(s,a,b,c,d,e,f) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),NULL,NULL)
80#define Dprintf7(s,a,b,c,d,e,f,g) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),NULL)
81#define Dprintf8(s,a,b,c,d,e,f,g,h) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),(void *)((unsigned long)h))
82
83#else /* DEBUG */
84
85#define Dprintf1(s,a) {}
86#define Dprintf2(s,a,b) {}
87#define Dprintf3(s,a,b,c) {}
88#define Dprintf4(s,a,b,c,d) {}
89#define Dprintf5(s,a,b,c,d,e) {}
90#define Dprintf6(s,a,b,c,d,e,f) {}
91#define Dprintf7(s,a,b,c,d,e,f,g) {}
92#define Dprintf8(s,a,b,c,d,e,f,g,h) {}
93
94#endif /* DEBUG */
95
96#define FLUSH
97
98#define HASH_STRIPEID(_sid_)  ( (_sid_) & (rf_lockTableSize-1) )
99
100static void AddToWaitersQueue(RF_StripeLockDesc_t * lockDesc,
101			      RF_LockReqDesc_t * lockReqDesc);
102static RF_StripeLockDesc_t *AllocStripeLockDesc(RF_StripeNum_t stripeID);
103static void FreeStripeLockDesc(RF_StripeLockDesc_t * p);
104static RF_LockTableEntry_t *rf_MakeLockTable(void);
105#if RF_DEBUG_STRIPELOCK
106static void PrintLockedStripes(RF_LockTableEntry_t * lockTable);
107#endif
108
109/* determines if two ranges overlap.  always yields false if either
110   start value is negative */
111#define SINGLE_RANGE_OVERLAP(_strt1, _stop1, _strt2, _stop2)              \
112        ( (_strt1 >= 0) && (_strt2 >= 0) &&                               \
113          (RF_MAX(_strt1, _strt2) <= RF_MIN(_stop1, _stop2)) )
114
115/* determines if any of the ranges specified in the two lock
116   descriptors overlap each other */
117
118#define RANGE_OVERLAP(_cand, _pred)                                       \
119  ( SINGLE_RANGE_OVERLAP((_cand)->start,  (_cand)->stop,                  \
120                         (_pred)->start,  (_pred)->stop ) ||              \
121    SINGLE_RANGE_OVERLAP((_cand)->start2, (_cand)->stop2,                 \
122                         (_pred)->start,  (_pred)->stop ) ||              \
123    SINGLE_RANGE_OVERLAP((_cand)->start,  (_cand)->stop,                  \
124                         (_pred)->start2, (_pred)->stop2) ||              \
125    SINGLE_RANGE_OVERLAP((_cand)->start2, (_cand)->stop2,                 \
126                         (_pred)->start2, (_pred)->stop2) )
127
128/* Determines if a candidate lock request conflicts with a predecessor
129 * lock req.  Note that the arguments are not interchangeable.
130 *
131 * The rules are:
132 *
133 *      a candidate read conflicts with a predecessor write if any
134 *      ranges overlap
135 *
136 *      a candidate write conflicts with a predecessor read if any
137 *      ranges overlap
138 *
139 *      a candidate write conflicts with a predecessor write if any
140 *      ranges overlap */
141
142#define STRIPELOCK_CONFLICT(_cand, _pred)                                 \
143        RANGE_OVERLAP((_cand), (_pred)) &&                                \
144        ( ( (((_cand)->type == RF_IO_TYPE_READ) &&                        \
145             ((_pred)->type == RF_IO_TYPE_WRITE)) ||                      \
146            (((_cand)->type == RF_IO_TYPE_WRITE) &&                       \
147             ((_pred)->type == RF_IO_TYPE_READ)) ||                       \
148            (((_cand)->type == RF_IO_TYPE_WRITE) &&                       \
149             ((_pred)->type == RF_IO_TYPE_WRITE))                         \
150          )                                                               \
151        )
152
153#define RF_MAX_FREE_STRIPELOCK 128
154#define RF_MIN_FREE_STRIPELOCK  32
155
156static void rf_ShutdownStripeLocks(RF_LockTableEntry_t * lockTable);
157static void rf_ShutdownStripeLockFreeList(void *);
158static void rf_RaidShutdownStripeLocks(void *);
159
160static void
161rf_ShutdownStripeLockFreeList(void *ignored)
162{
163	pool_destroy(&rf_pools.stripelock);
164}
165
166int
167rf_ConfigureStripeLockFreeList(RF_ShutdownList_t **listp)
168{
169	unsigned mask;
170
171	rf_pool_init(&rf_pools.stripelock, sizeof(RF_StripeLockDesc_t),
172		     "rf_stripelock_pl", RF_MIN_FREE_STRIPELOCK, RF_MAX_FREE_STRIPELOCK);
173	rf_ShutdownCreate(listp, rf_ShutdownStripeLockFreeList, NULL);
174
175	for (mask = 0x1; mask; mask <<= 1)
176		if (rf_lockTableSize == mask)
177			break;
178	if (!mask) {
179		printf("[WARNING:  lock table size must be a power of two.  Setting to %d.]\n", RF_DEFAULT_LOCK_TABLE_SIZE);
180		rf_lockTableSize = RF_DEFAULT_LOCK_TABLE_SIZE;
181	}
182	return (0);
183}
184
185static RF_LockTableEntry_t *
186rf_MakeLockTable(void)
187{
188	RF_LockTableEntry_t *lockTable;
189	int     i;
190
191	RF_Malloc(lockTable,
192		  ((int) rf_lockTableSize) * sizeof(RF_LockTableEntry_t),
193		  (RF_LockTableEntry_t *));
194	if (lockTable == NULL)
195		return (NULL);
196	for (i = 0; i < rf_lockTableSize; i++) {
197		rf_mutex_init(&lockTable[i].mutex);
198	}
199	return (lockTable);
200}
201
202static void
203rf_ShutdownStripeLocks(RF_LockTableEntry_t * lockTable)
204{
205
206#if RF_DEBUG_STRIPELOCK
207	if (rf_stripeLockDebug) {
208		PrintLockedStripes(lockTable);
209	}
210#endif
211	RF_Free(lockTable, rf_lockTableSize * sizeof(RF_LockTableEntry_t));
212}
213
214static void
215rf_RaidShutdownStripeLocks(void *arg)
216{
217	RF_Raid_t *raidPtr = (RF_Raid_t *) arg;
218	rf_ShutdownStripeLocks(raidPtr->lockTable);
219}
220
221int
222rf_ConfigureStripeLocks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
223			RF_Config_t *cfgPtr)
224{
225
226	raidPtr->lockTable = rf_MakeLockTable();
227	if (raidPtr->lockTable == NULL)
228		return (ENOMEM);
229	rf_ShutdownCreate(listp, rf_RaidShutdownStripeLocks, raidPtr);
230
231	return (0);
232}
233/* returns 0 if you've got the lock, and non-zero if you have to wait.
234 * if and only if you have to wait, we'll cause cbFunc to get invoked
235 * with cbArg when you are granted the lock.  We store a tag in
236 * *releaseTag that you need to give back to us when you release the
237 * lock.  */
238int
239rf_AcquireStripeLock(RF_LockTableEntry_t *lockTable, RF_StripeNum_t stripeID,
240		     RF_LockReqDesc_t *lockReqDesc)
241{
242	RF_StripeLockDesc_t *lockDesc;
243	RF_StripeLockDesc_t *newlockDesc;
244	RF_LockReqDesc_t *p;
245#if defined(DEBUG) && (RF_DEBUG_STRIPELOCK > 0)
246	int     tid = 0;
247#endif
248	int     hashval = HASH_STRIPEID(stripeID);
249	int     retcode = 0;
250
251	RF_ASSERT(RF_IO_IS_R_OR_W(lockReqDesc->type));
252
253#if RF_DEBUG_STRIPELOCK
254	if (rf_stripeLockDebug) {
255		if (stripeID == -1) {
256			Dprintf1("[%d] Lock acquisition supressed (stripeID == -1)\n", tid);
257		} else {
258			Dprintf8("[%d] Trying to acquire stripe lock table 0x%lx SID %ld type %c range %ld-%ld, range2 %ld-%ld hashval %d\n",
259			    tid, (unsigned long) lockTable, stripeID, lockReqDesc->type, lockReqDesc->start,
260			    lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2);
261			Dprintf3("[%d] lock %ld hashval %d\n", tid, stripeID, hashval);
262			FLUSH;
263		}
264	}
265#endif
266	if (stripeID == -1)
267		return (0);
268	lockReqDesc->next = NULL;	/* just to be sure */
269	newlockDesc = AllocStripeLockDesc(stripeID);
270
271	RF_LOCK_MUTEX(lockTable[hashval].mutex);
272	for (lockDesc = lockTable[hashval].descList; lockDesc;
273	     lockDesc = lockDesc->next) {
274		if (lockDesc->stripeID == stripeID)
275			break;
276	}
277
278	if (!lockDesc) {
279		/* no entry in table => no one reading or writing */
280		lockDesc = newlockDesc;
281		lockDesc->next = lockTable[hashval].descList;
282		lockTable[hashval].descList = lockDesc;
283		if (lockReqDesc->type == RF_IO_TYPE_WRITE)
284			lockDesc->nWriters++;
285		lockDesc->granted = lockReqDesc;
286#if RF_DEBUG_STRIPELOCK
287		if (rf_stripeLockDebug) {
288			Dprintf7("[%d] no one waiting: lock %ld %c %ld-%ld %ld-%ld granted\n",
289			    tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2);
290			FLUSH;
291		}
292#endif
293	} else {
294		/* we won't be needing newlockDesc after all.. pity.. */
295		FreeStripeLockDesc(newlockDesc);
296
297		if (lockReqDesc->type == RF_IO_TYPE_WRITE)
298			lockDesc->nWriters++;
299
300		if (lockDesc->nWriters == 0) {
301			/* no need to search any lists if there are no
302			 * writers anywhere */
303			lockReqDesc->next = lockDesc->granted;
304			lockDesc->granted = lockReqDesc;
305#if RF_DEBUG_STRIPELOCK
306			if (rf_stripeLockDebug) {
307				Dprintf7("[%d] no writers: lock %ld %c %ld-%ld %ld-%ld granted\n",
308				    tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2);
309				FLUSH;
310			}
311#endif
312		} else {
313
314			/* search the granted & waiting lists for a
315			 * conflict.  stop searching as soon as we
316			 * find one */
317			retcode = 0;
318			for (p = lockDesc->granted; p; p = p->next)
319				if (STRIPELOCK_CONFLICT(lockReqDesc, p)) {
320					retcode = 1;
321					break;
322				}
323			if (!retcode)
324				for (p = lockDesc->waitersH; p; p = p->next)
325					if (STRIPELOCK_CONFLICT(lockReqDesc, p)) {
326						retcode = 2;
327						break;
328					}
329			if (!retcode) {
330				/* no conflicts found => grant lock */
331				lockReqDesc->next = lockDesc->granted;
332				lockDesc->granted = lockReqDesc;
333#if RF_DEBUG_STRIPELOCK
334				if (rf_stripeLockDebug) {
335					Dprintf7("[%d] no conflicts: lock %ld %c %ld-%ld %ld-%ld granted\n",
336					    tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop,
337					    lockReqDesc->start2, lockReqDesc->stop2);
338					FLUSH;
339				}
340#endif
341			} else {
342#if RF_DEBUG_STRIPELOCK
343				if (rf_stripeLockDebug) {
344					Dprintf6("[%d] conflict: lock %ld %c %ld-%ld hashval=%d not granted\n",
345					    tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop,
346					    hashval);
347					Dprintf3("[%d] lock %ld retcode=%d\n", tid, stripeID, retcode);
348					FLUSH;
349				}
350#endif
351				AddToWaitersQueue(lockDesc, lockReqDesc);
352				/* conflict => the current access must wait */
353			}
354		}
355	}
356
357	RF_UNLOCK_MUTEX(lockTable[hashval].mutex);
358	return (retcode);
359}
360
361void
362rf_ReleaseStripeLock(RF_LockTableEntry_t *lockTable, RF_StripeNum_t stripeID,
363		     RF_LockReqDesc_t *lockReqDesc)
364{
365	RF_StripeLockDesc_t *lockDesc, *ld_t;
366	RF_LockReqDesc_t *lr, *lr_t, *callbacklist, *t;
367#if defined(DEBUG) && (RF_DEBUG_STRIPELOCK > 0)
368	int     tid = 0;
369#endif
370	int     hashval = HASH_STRIPEID(stripeID);
371	int     release_it, consider_it;
372	RF_LockReqDesc_t *candidate, *candidate_t, *predecessor;
373
374	RF_ASSERT(RF_IO_IS_R_OR_W(lockReqDesc->type));
375
376#if RF_DEBUG_STRIPELOCK
377	if (rf_stripeLockDebug) {
378		if (stripeID == -1) {
379			Dprintf1("[%d] Lock release supressed (stripeID == -1)\n", tid);
380		} else {
381			Dprintf8("[%d] Releasing stripe lock on stripe ID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n",
382			    tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2, lockTable);
383			FLUSH;
384		}
385	}
386#endif
387	if (stripeID == -1)
388		return;
389
390	RF_LOCK_MUTEX(lockTable[hashval].mutex);
391
392	/* find the stripe lock descriptor */
393	for (ld_t = NULL, lockDesc = lockTable[hashval].descList;
394	     lockDesc; ld_t = lockDesc, lockDesc = lockDesc->next) {
395		if (lockDesc->stripeID == stripeID)
396			break;
397	}
398	RF_ASSERT(lockDesc);	/* major error to release a lock that doesn't
399				 * exist */
400
401	/* find the stripe lock request descriptor & delete it from the list */
402	for (lr_t = NULL, lr = lockDesc->granted; lr; lr_t = lr, lr = lr->next)
403		if (lr == lockReqDesc)
404			break;
405
406	RF_ASSERT(lr && (lr == lockReqDesc));	/* major error to release a
407						 * lock that hasn't been
408						 * granted */
409	if (lr_t)
410		lr_t->next = lr->next;
411	else {
412		RF_ASSERT(lr == lockDesc->granted);
413		lockDesc->granted = lr->next;
414	}
415	lr->next = NULL;
416
417	if (lockReqDesc->type == RF_IO_TYPE_WRITE)
418		lockDesc->nWriters--;
419
420	/* search through the waiters list to see if anyone needs to
421	 * be woken up. for each such descriptor in the wait list, we
422	 * check it against everything granted and against everything
423	 * _in front_ of it in the waiters queue.  If it conflicts
424	 * with none of these, we release it.
425	 *
426	 * DON'T TOUCH THE TEMPLINK POINTER OF ANYTHING IN THE GRANTED
427	 * LIST HERE.
428	 *
429         * This will roach the case where the callback tries to
430         * acquire a new lock in the same stripe.  There are some
431         * asserts to try and detect this.
432	 *
433	 * We apply 2 performance optimizations: (1) if releasing this
434	 * lock results in no more writers to this stripe, we just
435	 * release everybody waiting, since we place no restrictions
436	 * on the number of concurrent reads. (2) we consider as
437	 * candidates for wakeup only those waiters that have a range
438	 * overlap with either the descriptor being woken up or with
439	 * something in the callbacklist (i.e.  something we've just
440	 * now woken up). This allows us to avoid the long evaluation
441	 * for some descriptors. */
442
443	callbacklist = NULL;
444	if (lockDesc->nWriters == 0) {	/* performance tweak (1) */
445		while (lockDesc->waitersH) {
446			/* delete from waiters list */
447			lr = lockDesc->waitersH;
448			lockDesc->waitersH = lr->next;
449
450			RF_ASSERT(lr->type == RF_IO_TYPE_READ);
451
452			/* add to granted list */
453			lr->next = lockDesc->granted;
454			lockDesc->granted = lr;
455
456			RF_ASSERT(!lr->templink);
457			/* put on callback list so that we'll invoke
458                           callback below */
459			lr->templink = callbacklist;
460			callbacklist = lr;
461#if RF_DEBUG_STRIPELOCK
462			if (rf_stripeLockDebug) {
463				Dprintf8("[%d] No writers: granting lock stripe ID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n",
464				    tid, stripeID, lr->type, lr->start, lr->stop, lr->start2, lr->stop2, (unsigned long) lockTable);
465				FLUSH;
466			}
467#endif
468		}
469		lockDesc->waitersT = NULL;
470		/* we've purged the whole waiters list */
471
472	} else
473		for (candidate_t = NULL, candidate = lockDesc->waitersH;
474		     candidate;) {
475
476			/* performance tweak (2) */
477			consider_it = 0;
478			if (RANGE_OVERLAP(lockReqDesc, candidate))
479				consider_it = 1;
480			else
481				for (t = callbacklist; t; t = t->templink)
482					if (RANGE_OVERLAP(t, candidate)) {
483						consider_it = 1;
484						break;
485					}
486			if (!consider_it) {
487#if RF_DEBUG_STRIPELOCK
488				if (rf_stripeLockDebug) {
489					Dprintf8("[%d] No overlap: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n",
490					    tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2,
491					    (unsigned long) lockTable);
492					FLUSH;
493				}
494#endif
495				candidate_t = candidate;
496				candidate = candidate->next;
497				continue;
498			}
499			/* we have a candidate for release.  check to
500			 * make sure it is not blocked by any granted
501			 * locks */
502			release_it = 1;
503			for (predecessor = lockDesc->granted; predecessor;
504			     predecessor = predecessor->next) {
505				if (STRIPELOCK_CONFLICT(candidate,
506							predecessor)) {
507#if RF_DEBUG_STRIPELOCK
508					if (rf_stripeLockDebug) {
509						Dprintf8("[%d] Conflicts with granted lock: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n",
510						    tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2,
511						    (unsigned long) lockTable);
512						FLUSH;
513					}
514#endif
515					release_it = 0;
516					break;
517				}
518			}
519
520			/* now check to see if the candidate is
521			 * blocked by any waiters that occur before it
522			 * it the wait queue */
523			if (release_it)
524				for (predecessor = lockDesc->waitersH;
525				     predecessor != candidate;
526				     predecessor = predecessor->next) {
527					if (STRIPELOCK_CONFLICT(candidate,
528								predecessor)) {
529#if RF_DEBUG_STRIPELOCK
530						if (rf_stripeLockDebug) {
531							Dprintf8("[%d] Conflicts with waiting lock: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n",
532							    tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2,
533							    (unsigned long) lockTable);
534							FLUSH;
535						}
536#endif
537						release_it = 0;
538						break;
539					}
540				}
541
542			/* release it if indicated */
543			if (release_it) {
544#if RF_DEBUG_STRIPELOCK
545				if (rf_stripeLockDebug) {
546					Dprintf8("[%d] Granting lock to candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n",
547					    tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2,
548					    (unsigned long) lockTable);
549					FLUSH;
550				}
551#endif
552				if (candidate_t) {
553					candidate_t->next = candidate->next;
554					if (lockDesc->waitersT == candidate)
555						lockDesc->waitersT = candidate_t;	/* cannot be waitersH since candidate_t is not NULL */
556				} else {
557					RF_ASSERT(candidate == lockDesc->waitersH);
558					lockDesc->waitersH = lockDesc->waitersH->next;
559					if (!lockDesc->waitersH)
560						lockDesc->waitersT = NULL;
561				}
562				/* move it to the granted list */
563				candidate->next = lockDesc->granted;
564				lockDesc->granted = candidate;
565
566				RF_ASSERT(!candidate->templink);
567				/* put it on the list of things to be
568                                   called after we release the mutex */
569				candidate->templink = callbacklist;
570
571				callbacklist = candidate;
572
573				if (!candidate_t)
574					candidate = lockDesc->waitersH;
575				else
576					candidate = candidate_t->next;
577				/* continue with the rest of the list */
578			} else {
579				candidate_t = candidate;
580				/* continue with the rest of the list */
581				candidate = candidate->next;
582			}
583		}
584
585	/* delete the descriptor if no one is waiting or active */
586	if (!lockDesc->granted && !lockDesc->waitersH) {
587		RF_ASSERT(lockDesc->nWriters == 0);
588#if RF_DEBUG_STRIPELOCK
589		if (rf_stripeLockDebug) {
590			Dprintf3("[%d] Last lock released (table 0x%lx): deleting desc for stripeID %ld\n", tid, (unsigned long) lockTable, stripeID);
591			FLUSH;
592		}
593#endif
594		if (ld_t)
595			ld_t->next = lockDesc->next;
596		else {
597			RF_ASSERT(lockDesc == lockTable[hashval].descList);
598			lockTable[hashval].descList = lockDesc->next;
599		}
600		FreeStripeLockDesc(lockDesc);
601		lockDesc = NULL;/* only for the ASSERT below */
602	}
603	RF_UNLOCK_MUTEX(lockTable[hashval].mutex);
604
605	/* now that we've unlocked the mutex, invoke the callback on
606	 * all the descriptors in the list */
607
608	/* if we deleted the descriptor, we should have no callbacks
609         * to do */
610	RF_ASSERT(!((callbacklist) && (!lockDesc)));
611	for (candidate = callbacklist; candidate;) {
612		t = candidate;
613		candidate = candidate->templink;
614		t->templink = NULL;
615		(t->cbFunc) (t->cbArg);
616	}
617}
618/* must have the indicated lock table mutex upon entry */
619static void
620AddToWaitersQueue(RF_StripeLockDesc_t *lockDesc, RF_LockReqDesc_t *lockReqDesc)
621{
622	if (!lockDesc->waitersH) {
623		lockDesc->waitersH = lockDesc->waitersT = lockReqDesc;
624	} else {
625		lockDesc->waitersT->next = lockReqDesc;
626		lockDesc->waitersT = lockReqDesc;
627	}
628}
629
630static RF_StripeLockDesc_t *
631AllocStripeLockDesc(RF_StripeNum_t stripeID)
632{
633	RF_StripeLockDesc_t *p;
634
635	p = pool_get(&rf_pools.stripelock, PR_WAITOK);
636	if (p) {
637		p->stripeID = stripeID;
638		p->granted = NULL;
639		p->waitersH = NULL;
640		p->waitersT = NULL;
641		p->nWriters = 0;
642		p->next = NULL;
643	}
644	return (p);
645}
646
647static void
648FreeStripeLockDesc(RF_StripeLockDesc_t *p)
649{
650	pool_put(&rf_pools.stripelock, p);
651}
652
653#if RF_DEBUG_STRIPELOCK
654static void
655PrintLockedStripes(RF_LockTableEntry_t *lockTable)
656{
657	int     i, j, foundone = 0, did;
658	RF_StripeLockDesc_t *p;
659	RF_LockReqDesc_t *q;
660
661	RF_LOCK_MUTEX(rf_printf_mutex);
662	printf("Locked stripes:\n");
663	for (i = 0; i < rf_lockTableSize; i++)
664		if (lockTable[i].descList) {
665			foundone = 1;
666			for (p = lockTable[i].descList; p; p = p->next) {
667				printf("Stripe ID 0x%lx (%d) nWriters %d\n",
668				    (long) p->stripeID, (int) p->stripeID,
669				       p->nWriters);
670
671				if (!(p->granted))
672					printf("Granted: (none)\n");
673				else
674					printf("Granted:\n");
675				for (did = 1, j = 0, q = p->granted; q;
676				     j++, q = q->next) {
677					printf("  %c(%ld-%ld", q->type, (long) q->start, (long) q->stop);
678					if (q->start2 != -1)
679						printf(",%ld-%ld) ", (long) q->start2,
680						    (long) q->stop2);
681					else
682						printf(") ");
683					if (j && !(j % 4)) {
684						printf("\n");
685						did = 1;
686					} else
687						did = 0;
688				}
689				if (!did)
690					printf("\n");
691
692				if (!(p->waitersH))
693					printf("Waiting: (none)\n");
694				else
695					printf("Waiting:\n");
696				for (did = 1, j = 0, q = p->waitersH; q;
697				     j++, q = q->next) {
698					printf("%c(%ld-%ld", q->type, (long) q->start, (long) q->stop);
699					if (q->start2 != -1)
700						printf(",%ld-%ld) ", (long) q->start2, (long) q->stop2);
701					else
702						printf(") ");
703					if (j && !(j % 4)) {
704						printf("\n         ");
705						did = 1;
706					} else
707						did = 0;
708				}
709				if (!did)
710					printf("\n");
711			}
712		}
713	if (!foundone)
714		printf("(none)\n");
715	else
716		printf("\n");
717	RF_UNLOCK_MUTEX(rf_printf_mutex);
718}
719#endif
720