rf_paritylog.c revision 1.12
1/*	$NetBSD: rf_paritylog.c,v 1.12 2006/04/26 17:08:48 oster Exp $	*/
2/*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29/* Code for manipulating in-core parity logs
30 *
31 */
32
33#include <sys/cdefs.h>
34__KERNEL_RCSID(0, "$NetBSD: rf_paritylog.c,v 1.12 2006/04/26 17:08:48 oster Exp $");
35
36#include "rf_archs.h"
37
38#if RF_INCLUDE_PARITYLOGGING > 0
39
40/*
41 * Append-only log for recording parity "update" and "overwrite" records
42 */
43
44#include <dev/raidframe/raidframevar.h>
45
46#include "rf_threadstuff.h"
47#include "rf_mcpair.h"
48#include "rf_raid.h"
49#include "rf_dag.h"
50#include "rf_dagfuncs.h"
51#include "rf_desc.h"
52#include "rf_layout.h"
53#include "rf_diskqueue.h"
54#include "rf_etimer.h"
55#include "rf_paritylog.h"
56#include "rf_general.h"
57#include "rf_map.h"
58#include "rf_paritylogging.h"
59#include "rf_paritylogDiskMgr.h"
60
61static RF_CommonLogData_t *
62AllocParityLogCommonData(RF_Raid_t * raidPtr)
63{
64	RF_CommonLogData_t *common = NULL;
65
66	/* Return a struct for holding common parity log information from the
67	 * free list (rf_parityLogDiskQueue.freeCommonList).  If the free list
68	 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
69
70	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
71	if (raidPtr->parityLogDiskQueue.freeCommonList) {
72		common = raidPtr->parityLogDiskQueue.freeCommonList;
73		raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
74		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
75	} else {
76		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
77		RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
78		rf_mutex_init(&common->mutex);
79	}
80	common->next = NULL;
81	return (common);
82}
83
84static void
85FreeParityLogCommonData(RF_CommonLogData_t * common)
86{
87	RF_Raid_t *raidPtr;
88
89	/* Insert a single struct for holding parity log information (data)
90	 * into the free list (rf_parityLogDiskQueue.freeCommonList).
91	 * NON-BLOCKING */
92
93	raidPtr = common->raidPtr;
94	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
95	common->next = raidPtr->parityLogDiskQueue.freeCommonList;
96	raidPtr->parityLogDiskQueue.freeCommonList = common;
97	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
98}
99
100static RF_ParityLogData_t *
101AllocParityLogData(RF_Raid_t * raidPtr)
102{
103	RF_ParityLogData_t *data = NULL;
104
105	/* Return a struct for holding parity log information from the free
106	 * list (rf_parityLogDiskQueue.freeList).  If the free list is empty,
107	 * call RF_Malloc to create a new structure. NON-BLOCKING */
108
109	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
110	if (raidPtr->parityLogDiskQueue.freeDataList) {
111		data = raidPtr->parityLogDiskQueue.freeDataList;
112		raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
113		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
114	} else {
115		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
116		RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
117	}
118	data->next = NULL;
119	data->prev = NULL;
120	return (data);
121}
122
123
124static void
125FreeParityLogData(RF_ParityLogData_t * data)
126{
127	RF_ParityLogData_t *nextItem;
128	RF_Raid_t *raidPtr;
129
130	/* Insert a linked list of structs for holding parity log information
131	 * (data) into the free list (parityLogDiskQueue.freeList).
132	 * NON-BLOCKING */
133
134	raidPtr = data->common->raidPtr;
135	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
136	while (data) {
137		nextItem = data->next;
138		data->next = raidPtr->parityLogDiskQueue.freeDataList;
139		raidPtr->parityLogDiskQueue.freeDataList = data;
140		data = nextItem;
141	}
142	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
143}
144
145
146static void
147EnqueueParityLogData(
148    RF_ParityLogData_t * data,
149    RF_ParityLogData_t ** head,
150    RF_ParityLogData_t ** tail)
151{
152	RF_Raid_t *raidPtr;
153
154	/* Insert an in-core parity log (*data) into the head of a disk queue
155	 * (*head, *tail). NON-BLOCKING */
156
157	raidPtr = data->common->raidPtr;
158	if (rf_parityLogDebug)
159		printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
160	RF_ASSERT(data->prev == NULL);
161	RF_ASSERT(data->next == NULL);
162	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
163	if (*head) {
164		/* insert into head of queue */
165		RF_ASSERT((*head)->prev == NULL);
166		RF_ASSERT((*tail)->next == NULL);
167		data->next = *head;
168		(*head)->prev = data;
169		*head = data;
170	} else {
171		/* insert into empty list */
172		RF_ASSERT(*head == NULL);
173		RF_ASSERT(*tail == NULL);
174		*head = data;
175		*tail = data;
176	}
177	RF_ASSERT((*head)->prev == NULL);
178	RF_ASSERT((*tail)->next == NULL);
179	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
180}
181
182static RF_ParityLogData_t *
183DequeueParityLogData(
184    RF_Raid_t * raidPtr,
185    RF_ParityLogData_t ** head,
186    RF_ParityLogData_t ** tail,
187    int ignoreLocks)
188{
189	RF_ParityLogData_t *data;
190
191	/* Remove and return an in-core parity log from the tail of a disk
192	 * queue (*head, *tail). NON-BLOCKING */
193
194	/* remove from tail, preserving FIFO order */
195	if (!ignoreLocks)
196		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
197	data = *tail;
198	if (data) {
199		if (*head == *tail) {
200			/* removing last item from queue */
201			*head = NULL;
202			*tail = NULL;
203		} else {
204			*tail = (*tail)->prev;
205			(*tail)->next = NULL;
206			RF_ASSERT((*head)->prev == NULL);
207			RF_ASSERT((*tail)->next == NULL);
208		}
209		data->next = NULL;
210		data->prev = NULL;
211		if (rf_parityLogDebug)
212			printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
213	}
214	if (*head) {
215		RF_ASSERT((*head)->prev == NULL);
216		RF_ASSERT((*tail)->next == NULL);
217	}
218	if (!ignoreLocks)
219		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
220	return (data);
221}
222
223
224static void
225RequeueParityLogData(
226    RF_ParityLogData_t * data,
227    RF_ParityLogData_t ** head,
228    RF_ParityLogData_t ** tail)
229{
230	RF_Raid_t *raidPtr;
231
232	/* Insert an in-core parity log (*data) into the tail of a disk queue
233	 * (*head, *tail). NON-BLOCKING */
234
235	raidPtr = data->common->raidPtr;
236	RF_ASSERT(data);
237	if (rf_parityLogDebug)
238		printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
239	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
240	if (*tail) {
241		/* append to tail of list */
242		data->prev = *tail;
243		data->next = NULL;
244		(*tail)->next = data;
245		*tail = data;
246	} else {
247		/* inserting into an empty list */
248		*head = data;
249		*tail = data;
250		(*head)->prev = NULL;
251		(*tail)->next = NULL;
252	}
253	RF_ASSERT((*head)->prev == NULL);
254	RF_ASSERT((*tail)->next == NULL);
255	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
256}
257
258RF_ParityLogData_t *
259rf_CreateParityLogData(
260    RF_ParityRecordType_t operation,
261    RF_PhysDiskAddr_t * pda,
262    caddr_t bufPtr,
263    RF_Raid_t * raidPtr,
264    int (*wakeFunc) (RF_DagNode_t * node, int status),
265    void *wakeArg,
266    RF_AccTraceEntry_t * tracerec,
267    RF_Etimer_t startTime)
268{
269	RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
270	RF_CommonLogData_t *common;
271	RF_PhysDiskAddr_t *diskAddress;
272	int     boundary, offset = 0;
273
274	/* Return an initialized struct of info to be logged. Build one item
275	 * per physical disk address, one item per region.
276	 *
277	 * NON-BLOCKING */
278
279	diskAddress = pda;
280	common = AllocParityLogCommonData(raidPtr);
281	RF_ASSERT(common);
282
283	common->operation = operation;
284	common->bufPtr = bufPtr;
285	common->raidPtr = raidPtr;
286	common->wakeFunc = wakeFunc;
287	common->wakeArg = wakeArg;
288	common->tracerec = tracerec;
289	common->startTime = startTime;
290	common->cnt = 0;
291
292	if (rf_parityLogDebug)
293		printf("[entering CreateParityLogData]\n");
294	while (diskAddress) {
295		common->cnt++;
296		data = AllocParityLogData(raidPtr);
297		RF_ASSERT(data);
298		data->common = common;
299		data->next = NULL;
300		data->prev = NULL;
301		data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
302		if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
303			/* disk address does not cross a region boundary */
304			data->diskAddress = *diskAddress;
305			data->bufOffset = offset;
306			offset = offset + diskAddress->numSector;
307			EnqueueParityLogData(data, &resultHead, &resultTail);
308			/* adjust disk address */
309			diskAddress = diskAddress->next;
310		} else {
311			/* disk address crosses a region boundary */
312			/* find address where region is crossed */
313			boundary = 0;
314			while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
315				boundary++;
316
317			/* enter data before the boundary */
318			data->diskAddress = *diskAddress;
319			data->diskAddress.numSector = boundary;
320			data->bufOffset = offset;
321			offset += boundary;
322			EnqueueParityLogData(data, &resultHead, &resultTail);
323			/* adjust disk address */
324			diskAddress->startSector += boundary;
325			diskAddress->numSector -= boundary;
326		}
327	}
328	if (rf_parityLogDebug)
329		printf("[leaving CreateParityLogData]\n");
330	return (resultHead);
331}
332
333
334RF_ParityLogData_t *
335rf_SearchAndDequeueParityLogData(
336    RF_Raid_t * raidPtr,
337    int regionID,
338    RF_ParityLogData_t ** head,
339    RF_ParityLogData_t ** tail,
340    int ignoreLocks)
341{
342	RF_ParityLogData_t *w;
343
344	/* Remove and return an in-core parity log from a specified region
345	 * (regionID). If a matching log is not found, return NULL.
346	 *
347	 * NON-BLOCKING. */
348
349	/* walk backward through a list, looking for an entry with a matching
350	 * region ID */
351	if (!ignoreLocks)
352		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
353	w = (*tail);
354	while (w) {
355		if (w->regionID == regionID) {
356			/* remove an element from the list */
357			if (w == *tail) {
358				if (*head == *tail) {
359					/* removing only element in the list */
360					*head = NULL;
361					*tail = NULL;
362				} else {
363					/* removing last item in the list */
364					*tail = (*tail)->prev;
365					(*tail)->next = NULL;
366					RF_ASSERT((*head)->prev == NULL);
367					RF_ASSERT((*tail)->next == NULL);
368				}
369			} else {
370				if (w == *head) {
371					/* removing first item in the list */
372					*head = (*head)->next;
373					(*head)->prev = NULL;
374					RF_ASSERT((*head)->prev == NULL);
375					RF_ASSERT((*tail)->next == NULL);
376				} else {
377					/* removing an item from the middle of
378					 * the list */
379					w->prev->next = w->next;
380					w->next->prev = w->prev;
381					RF_ASSERT((*head)->prev == NULL);
382					RF_ASSERT((*tail)->next == NULL);
383				}
384			}
385			w->prev = NULL;
386			w->next = NULL;
387			if (rf_parityLogDebug)
388				printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
389			return (w);
390		} else
391			w = w->prev;
392	}
393	if (!ignoreLocks)
394		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
395	return (NULL);
396}
397
398static RF_ParityLogData_t *
399DequeueMatchingLogData(
400    RF_Raid_t * raidPtr,
401    RF_ParityLogData_t ** head,
402    RF_ParityLogData_t ** tail)
403{
404	RF_ParityLogData_t *logDataList, *logData;
405	int     regionID;
406
407	/* Remove and return an in-core parity log from the tail of a disk
408	 * queue (*head, *tail).  Then remove all matching (identical
409	 * regionIDs) logData and return as a linked list.
410	 *
411	 * NON-BLOCKING */
412
413	logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
414	if (logDataList) {
415		regionID = logDataList->regionID;
416		logData = logDataList;
417		logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
418		while (logData->next) {
419			logData = logData->next;
420			logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
421		}
422	}
423	return (logDataList);
424}
425
426
427static RF_ParityLog_t *
428AcquireParityLog(
429    RF_ParityLogData_t * logData,
430    int finish)
431{
432	RF_ParityLog_t *log = NULL;
433	RF_Raid_t *raidPtr;
434
435	/* Grab a log buffer from the pool and return it. If no buffers are
436	 * available, return NULL. NON-BLOCKING */
437	raidPtr = logData->common->raidPtr;
438	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
439	if (raidPtr->parityLogPool.parityLogs) {
440		log = raidPtr->parityLogPool.parityLogs;
441		raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
442		log->regionID = logData->regionID;
443		log->numRecords = 0;
444		log->next = NULL;
445		raidPtr->logsInUse++;
446		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
447	} else {
448		/* no logs available, so place ourselves on the queue of work
449		 * waiting on log buffers this is done while
450		 * parityLogPool.mutex is held, to ensure synchronization with
451		 * ReleaseParityLogs. */
452		if (rf_parityLogDebug)
453			printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
454		if (finish)
455			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
456		else
457			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
458	}
459	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
460	return (log);
461}
462
463void
464rf_ReleaseParityLogs(
465    RF_Raid_t * raidPtr,
466    RF_ParityLog_t * firstLog)
467{
468	RF_ParityLogData_t *logDataList;
469	RF_ParityLog_t *log, *lastLog;
470	int     cnt;
471
472	/* Insert a linked list of parity logs (firstLog) to the free list
473	 * (parityLogPool.parityLogPool)
474	 *
475	 * NON-BLOCKING. */
476
477	RF_ASSERT(firstLog);
478
479	/* Before returning logs to global free list, service all requests
480	 * which are blocked on logs.  Holding mutexes for parityLogPool and
481	 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
482	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
483	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
484	logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
485	log = firstLog;
486	if (firstLog)
487		firstLog = firstLog->next;
488	log->numRecords = 0;
489	log->next = NULL;
490	while (logDataList && log) {
491		RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
492		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
493		rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
494		if (rf_parityLogDebug)
495			printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
496		if (log == NULL) {
497			log = firstLog;
498			if (firstLog) {
499				firstLog = firstLog->next;
500				log->numRecords = 0;
501				log->next = NULL;
502			}
503		}
504		RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
505		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
506		if (log)
507			logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
508	}
509	/* return remaining logs to pool */
510	if (log) {
511		log->next = firstLog;
512		firstLog = log;
513	}
514	if (firstLog) {
515		lastLog = firstLog;
516		raidPtr->logsInUse--;
517		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
518		while (lastLog->next) {
519			lastLog = lastLog->next;
520			raidPtr->logsInUse--;
521			RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
522		}
523		lastLog->next = raidPtr->parityLogPool.parityLogs;
524		raidPtr->parityLogPool.parityLogs = firstLog;
525		cnt = 0;
526		log = raidPtr->parityLogPool.parityLogs;
527		while (log) {
528			cnt++;
529			log = log->next;
530		}
531		RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
532	}
533	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
534	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
535}
536
537static void
538ReintLog(
539    RF_Raid_t * raidPtr,
540    int regionID,
541    RF_ParityLog_t * log)
542{
543	RF_ASSERT(log);
544
545	/* Insert an in-core parity log (log) into the disk queue of
546	 * reintegration work.  Set the flag (reintInProgress) for the
547	 * specified region (regionID) to indicate that reintegration is in
548	 * progress for this region. NON-BLOCKING */
549
550	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
551	raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE;	/* cleared when reint
552									 * complete */
553
554	if (rf_parityLogDebug)
555		printf("[requesting reintegration of region %d]\n", log->regionID);
556	/* move record to reintegration queue */
557	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
558	log->next = raidPtr->parityLogDiskQueue.reintQueue;
559	raidPtr->parityLogDiskQueue.reintQueue = log;
560	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
561	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
562	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
563}
564
565static void
566FlushLog(
567    RF_Raid_t * raidPtr,
568    RF_ParityLog_t * log)
569{
570	/* insert a core log (log) into a list of logs
571	 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
572	 * NON-BLOCKING */
573
574	RF_ASSERT(log);
575	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
576	RF_ASSERT(log->next == NULL);
577	/* move log to flush queue */
578	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
579	log->next = raidPtr->parityLogDiskQueue.flushQueue;
580	raidPtr->parityLogDiskQueue.flushQueue = log;
581	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
582	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
583}
584
585static int
586DumpParityLogToDisk(
587    int finish,
588    RF_ParityLogData_t * logData)
589{
590	int     i, diskCount, regionID = logData->regionID;
591	RF_ParityLog_t *log;
592	RF_Raid_t *raidPtr;
593
594	raidPtr = logData->common->raidPtr;
595
596	/* Move a core log to disk.  If the log disk is full, initiate
597	 * reintegration.
598	 *
599	 * Return (0) if we can enqueue the dump immediately, otherwise return
600	 * (1) to indicate we are blocked on reintegration and control of the
601	 * thread should be relinquished.
602	 *
603	 * Caller must hold regionInfo[regionID].mutex
604	 *
605	 * NON-BLOCKING */
606
607	if (rf_parityLogDebug)
608		printf("[dumping parity log to disk, region %d]\n", regionID);
609	log = raidPtr->regionInfo[regionID].coreLog;
610	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
611	RF_ASSERT(log->next == NULL);
612
613	/* if reintegration is in progress, must queue work */
614	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
615	if (raidPtr->regionInfo[regionID].reintInProgress) {
616		/* Can not proceed since this region is currently being
617		 * reintegrated. We can not block, so queue remaining work and
618		 * return */
619		if (rf_parityLogDebug)
620			printf("[region %d waiting on reintegration]\n", regionID);
621		/* XXX not sure about the use of finish - shouldn't this
622		 * always be "Enqueue"? */
623		if (finish)
624			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
625		else
626			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
627		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
628		return (1);	/* relenquish control of this thread */
629	}
630	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
631	raidPtr->regionInfo[regionID].coreLog = NULL;
632	if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
633		/* IMPORTANT!! this loop bound assumes region disk holds an
634		 * integral number of core logs */
635	{
636		/* update disk map for this region */
637		diskCount = raidPtr->regionInfo[regionID].diskCount;
638		for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
639			raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
640			raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
641		}
642		log->diskOffset = diskCount;
643		raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
644		FlushLog(raidPtr, log);
645	} else {
646		/* no room for log on disk, send it to disk manager and
647		 * request reintegration */
648		RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
649		ReintLog(raidPtr, regionID, log);
650	}
651	if (rf_parityLogDebug)
652		printf("[finished dumping parity log to disk, region %d]\n", regionID);
653	return (0);
654}
655
656int
657rf_ParityLogAppend(
658    RF_ParityLogData_t * logData,
659    int finish,
660    RF_ParityLog_t ** incomingLog,
661    int clearReintFlag)
662{
663	int     regionID, logItem, itemDone;
664	RF_ParityLogData_t *item;
665	int     punt, done = RF_FALSE;
666	RF_ParityLog_t *log;
667	RF_Raid_t *raidPtr;
668	RF_Etimer_t timer;
669	int     (*wakeFunc) (RF_DagNode_t * node, int status);
670	void   *wakeArg;
671
672	/* Add parity to the appropriate log, one sector at a time. This
673	 * routine is called is called by dag functions ParityLogUpdateFunc
674	 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
675	 *
676	 * Parity to be logged is contained in a linked-list (logData).  When
677	 * this routine returns, every sector in the list will be in one of
678	 * three places: 1) entered into the parity log 2) queued, waiting on
679	 * reintegration 3) queued, waiting on a core log
680	 *
681	 * Blocked work is passed to the ParityLoggingDiskManager for completion.
682	 * Later, as conditions which required the block are removed, the work
683	 * reenters this routine with the "finish" parameter set to "RF_TRUE."
684	 *
685	 * NON-BLOCKING */
686
687	raidPtr = logData->common->raidPtr;
688	/* lock the region for the first item in logData */
689	RF_ASSERT(logData != NULL);
690	regionID = logData->regionID;
691	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
692	RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
693
694	if (clearReintFlag) {
695		/* Enable flushing for this region.  Holding both locks
696		 * provides a synchronization barrier with DumpParityLogToDisk */
697		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
698		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
699		RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
700		raidPtr->regionInfo[regionID].diskCount = 0;
701		raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
702		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);	/* flushing is now
703										 * enabled */
704		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
705	}
706	/* process each item in logData */
707	while (logData) {
708		/* remove an item from logData */
709		item = logData;
710		logData = logData->next;
711		item->next = NULL;
712		item->prev = NULL;
713
714		if (rf_parityLogDebug)
715			printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
716
717		/* see if we moved to a new region */
718		if (regionID != item->regionID) {
719			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
720			regionID = item->regionID;
721			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
722			RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
723		}
724		punt = RF_FALSE;/* Set to RF_TRUE if work is blocked.  This
725				 * can happen in one of two ways: 1) no core
726				 * log (AcquireParityLog) 2) waiting on
727				 * reintegration (DumpParityLogToDisk) If punt
728				 * is RF_TRUE, the dataItem was queued, so
729				 * skip to next item. */
730
731		/* process item, one sector at a time, until all sectors
732		 * processed or we punt */
733		if (item->diskAddress.numSector > 0)
734			done = RF_FALSE;
735		else
736			RF_ASSERT(0);
737		while (!punt && !done) {
738			/* verify that a core log exists for this region */
739			if (!raidPtr->regionInfo[regionID].coreLog) {
740				/* Attempt to acquire a parity log. If
741				 * acquisition fails, queue remaining work in
742				 * data item and move to nextItem. */
743				if (incomingLog)
744					if (*incomingLog) {
745						RF_ASSERT((*incomingLog)->next == NULL);
746						raidPtr->regionInfo[regionID].coreLog = *incomingLog;
747						raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
748						*incomingLog = NULL;
749					} else
750						raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
751				else
752					raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
753				/* Note: AcquireParityLog either returns a log
754				 * or enqueues currentItem */
755			}
756			if (!raidPtr->regionInfo[regionID].coreLog)
757				punt = RF_TRUE;	/* failed to find a core log */
758			else {
759				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
760				/* verify that the log has room for new
761				 * entries */
762				/* if log is full, dump it to disk and grab a
763				 * new log */
764				if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
765					/* log is full, dump it to disk */
766					if (DumpParityLogToDisk(finish, item))
767						punt = RF_TRUE;	/* dump unsuccessful,
768								 * blocked on
769								 * reintegration */
770					else {
771						/* dump was successful */
772						if (incomingLog)
773							if (*incomingLog) {
774								RF_ASSERT((*incomingLog)->next == NULL);
775								raidPtr->regionInfo[regionID].coreLog = *incomingLog;
776								raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
777								*incomingLog = NULL;
778							} else
779								raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
780						else
781							raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
782						/* if a core log is not
783						 * available, must queue work
784						 * and return */
785						if (!raidPtr->regionInfo[regionID].coreLog)
786							punt = RF_TRUE;	/* blocked on log
787									 * availability */
788					}
789				}
790			}
791			/* if we didn't punt on this item, attempt to add a
792			 * sector to the core log */
793			if (!punt) {
794				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
795				/* at this point, we have a core log with
796				 * enough room for a sector */
797				/* copy a sector into the log */
798				log = raidPtr->regionInfo[regionID].coreLog;
799				RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
800				logItem = log->numRecords++;
801				log->records[logItem].parityAddr = item->diskAddress;
802				RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
803				RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
804				log->records[logItem].parityAddr.numSector = 1;
805				log->records[logItem].operation = item->common->operation;
806				memcpy(log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), (1 << item->common->raidPtr->logBytesPerSector));
807				item->diskAddress.numSector--;
808				item->diskAddress.startSector++;
809				if (item->diskAddress.numSector == 0)
810					done = RF_TRUE;
811			}
812		}
813
814		if (!punt) {
815			/* Processed this item completely, decrement count of
816			 * items to be processed. */
817			RF_ASSERT(item->diskAddress.numSector == 0);
818			RF_LOCK_MUTEX(item->common->mutex);
819			item->common->cnt--;
820			if (item->common->cnt == 0)
821				itemDone = RF_TRUE;
822			else
823				itemDone = RF_FALSE;
824			RF_UNLOCK_MUTEX(item->common->mutex);
825			if (itemDone) {
826				/* Finished processing all log data for this
827				 * IO Return structs to free list and invoke
828				 * wakeup function. */
829				timer = item->common->startTime;	/* grab initial value of
830									 * timer */
831				RF_ETIMER_STOP(timer);
832				RF_ETIMER_EVAL(timer);
833				item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
834				if (rf_parityLogDebug)
835					printf("[waking process for region %d]\n", item->regionID);
836				wakeFunc = item->common->wakeFunc;
837				wakeArg = item->common->wakeArg;
838				FreeParityLogCommonData(item->common);
839				FreeParityLogData(item);
840				(wakeFunc) (wakeArg, 0);
841			} else
842				FreeParityLogData(item);
843		}
844	}
845	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
846	if (rf_parityLogDebug)
847		printf("[exiting ParityLogAppend]\n");
848	return (0);
849}
850
851
852void
853rf_EnableParityLogging(RF_Raid_t * raidPtr)
854{
855	int     regionID;
856
857	for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
858		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
859		raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
860		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
861	}
862	if (rf_parityLogDebug)
863		printf("[parity logging enabled]\n");
864}
865#endif				/* RF_INCLUDE_PARITYLOGGING > 0 */
866