rf_paritylog.c revision 1.5
1/*	$NetBSD: rf_paritylog.c,v 1.5 2000/01/07 03:41:01 oster Exp $	*/
2/*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29/* Code for manipulating in-core parity logs
30 *
31 */
32
33#include "rf_archs.h"
34
35#if RF_INCLUDE_PARITYLOGGING > 0
36
37/*
38 * Append-only log for recording parity "update" and "overwrite" records
39 */
40
41#include "rf_types.h"
42#include "rf_threadstuff.h"
43#include "rf_mcpair.h"
44#include "rf_raid.h"
45#include "rf_dag.h"
46#include "rf_dagfuncs.h"
47#include "rf_desc.h"
48#include "rf_layout.h"
49#include "rf_diskqueue.h"
50#include "rf_etimer.h"
51#include "rf_paritylog.h"
52#include "rf_general.h"
53#include "rf_map.h"
54#include "rf_paritylogging.h"
55#include "rf_paritylogDiskMgr.h"
56
57static RF_CommonLogData_t *
58AllocParityLogCommonData(RF_Raid_t * raidPtr)
59{
60	RF_CommonLogData_t *common = NULL;
61	int     rc;
62
63	/* Return a struct for holding common parity log information from the
64	 * free list (rf_parityLogDiskQueue.freeCommonList).  If the free list
65	 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
66
67	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
68	if (raidPtr->parityLogDiskQueue.freeCommonList) {
69		common = raidPtr->parityLogDiskQueue.freeCommonList;
70		raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
71		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
72	} else {
73		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
74		RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
75		rc = rf_mutex_init(&common->mutex);
76		if (rc) {
77			RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
78			    __LINE__, rc);
79			RF_Free(common, sizeof(RF_CommonLogData_t));
80			common = NULL;
81		}
82	}
83	common->next = NULL;
84	return (common);
85}
86
87static void
88FreeParityLogCommonData(RF_CommonLogData_t * common)
89{
90	RF_Raid_t *raidPtr;
91
92	/* Insert a single struct for holding parity log information (data)
93	 * into the free list (rf_parityLogDiskQueue.freeCommonList).
94	 * NON-BLOCKING */
95
96	raidPtr = common->raidPtr;
97	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
98	common->next = raidPtr->parityLogDiskQueue.freeCommonList;
99	raidPtr->parityLogDiskQueue.freeCommonList = common;
100	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
101}
102
103static RF_ParityLogData_t *
104AllocParityLogData(RF_Raid_t * raidPtr)
105{
106	RF_ParityLogData_t *data = NULL;
107
108	/* Return a struct for holding parity log information from the free
109	 * list (rf_parityLogDiskQueue.freeList).  If the free list is empty,
110	 * call RF_Malloc to create a new structure. NON-BLOCKING */
111
112	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
113	if (raidPtr->parityLogDiskQueue.freeDataList) {
114		data = raidPtr->parityLogDiskQueue.freeDataList;
115		raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
116		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
117	} else {
118		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
119		RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
120	}
121	data->next = NULL;
122	data->prev = NULL;
123	return (data);
124}
125
126
127static void
128FreeParityLogData(RF_ParityLogData_t * data)
129{
130	RF_ParityLogData_t *nextItem;
131	RF_Raid_t *raidPtr;
132
133	/* Insert a linked list of structs for holding parity log information
134	 * (data) into the free list (parityLogDiskQueue.freeList).
135	 * NON-BLOCKING */
136
137	raidPtr = data->common->raidPtr;
138	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
139	while (data) {
140		nextItem = data->next;
141		data->next = raidPtr->parityLogDiskQueue.freeDataList;
142		raidPtr->parityLogDiskQueue.freeDataList = data;
143		data = nextItem;
144	}
145	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
146}
147
148
149static void
150EnqueueParityLogData(
151    RF_ParityLogData_t * data,
152    RF_ParityLogData_t ** head,
153    RF_ParityLogData_t ** tail)
154{
155	RF_Raid_t *raidPtr;
156
157	/* Insert an in-core parity log (*data) into the head of a disk queue
158	 * (*head, *tail). NON-BLOCKING */
159
160	raidPtr = data->common->raidPtr;
161	if (rf_parityLogDebug)
162		printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
163	RF_ASSERT(data->prev == NULL);
164	RF_ASSERT(data->next == NULL);
165	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
166	if (*head) {
167		/* insert into head of queue */
168		RF_ASSERT((*head)->prev == NULL);
169		RF_ASSERT((*tail)->next == NULL);
170		data->next = *head;
171		(*head)->prev = data;
172		*head = data;
173	} else {
174		/* insert into empty list */
175		RF_ASSERT(*head == NULL);
176		RF_ASSERT(*tail == NULL);
177		*head = data;
178		*tail = data;
179	}
180	RF_ASSERT((*head)->prev == NULL);
181	RF_ASSERT((*tail)->next == NULL);
182	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
183}
184
185static RF_ParityLogData_t *
186DequeueParityLogData(
187    RF_Raid_t * raidPtr,
188    RF_ParityLogData_t ** head,
189    RF_ParityLogData_t ** tail,
190    int ignoreLocks)
191{
192	RF_ParityLogData_t *data;
193
194	/* Remove and return an in-core parity log from the tail of a disk
195	 * queue (*head, *tail). NON-BLOCKING */
196
197	/* remove from tail, preserving FIFO order */
198	if (!ignoreLocks)
199		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
200	data = *tail;
201	if (data) {
202		if (*head == *tail) {
203			/* removing last item from queue */
204			*head = NULL;
205			*tail = NULL;
206		} else {
207			*tail = (*tail)->prev;
208			(*tail)->next = NULL;
209			RF_ASSERT((*head)->prev == NULL);
210			RF_ASSERT((*tail)->next == NULL);
211		}
212		data->next = NULL;
213		data->prev = NULL;
214		if (rf_parityLogDebug)
215			printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
216	}
217	if (*head) {
218		RF_ASSERT((*head)->prev == NULL);
219		RF_ASSERT((*tail)->next == NULL);
220	}
221	if (!ignoreLocks)
222		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
223	return (data);
224}
225
226
227static void
228RequeueParityLogData(
229    RF_ParityLogData_t * data,
230    RF_ParityLogData_t ** head,
231    RF_ParityLogData_t ** tail)
232{
233	RF_Raid_t *raidPtr;
234
235	/* Insert an in-core parity log (*data) into the tail of a disk queue
236	 * (*head, *tail). NON-BLOCKING */
237
238	raidPtr = data->common->raidPtr;
239	RF_ASSERT(data);
240	if (rf_parityLogDebug)
241		printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
242	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
243	if (*tail) {
244		/* append to tail of list */
245		data->prev = *tail;
246		data->next = NULL;
247		(*tail)->next = data;
248		*tail = data;
249	} else {
250		/* inserting into an empty list */
251		*head = data;
252		*tail = data;
253		(*head)->prev = NULL;
254		(*tail)->next = NULL;
255	}
256	RF_ASSERT((*head)->prev == NULL);
257	RF_ASSERT((*tail)->next == NULL);
258	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
259}
260
261RF_ParityLogData_t *
262rf_CreateParityLogData(
263    RF_ParityRecordType_t operation,
264    RF_PhysDiskAddr_t * pda,
265    caddr_t bufPtr,
266    RF_Raid_t * raidPtr,
267    int (*wakeFunc) (RF_DagNode_t * node, int status),
268    void *wakeArg,
269    RF_AccTraceEntry_t * tracerec,
270    RF_Etimer_t startTime)
271{
272	RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
273	RF_CommonLogData_t *common;
274	RF_PhysDiskAddr_t *diskAddress;
275	int     boundary, offset = 0;
276
277	/* Return an initialized struct of info to be logged. Build one item
278	 * per physical disk address, one item per region.
279	 *
280	 * NON-BLOCKING */
281
282	diskAddress = pda;
283	common = AllocParityLogCommonData(raidPtr);
284	RF_ASSERT(common);
285
286	common->operation = operation;
287	common->bufPtr = bufPtr;
288	common->raidPtr = raidPtr;
289	common->wakeFunc = wakeFunc;
290	common->wakeArg = wakeArg;
291	common->tracerec = tracerec;
292	common->startTime = startTime;
293	common->cnt = 0;
294
295	if (rf_parityLogDebug)
296		printf("[entering CreateParityLogData]\n");
297	while (diskAddress) {
298		common->cnt++;
299		data = AllocParityLogData(raidPtr);
300		RF_ASSERT(data);
301		data->common = common;
302		data->next = NULL;
303		data->prev = NULL;
304		data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
305		if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
306			/* disk address does not cross a region boundary */
307			data->diskAddress = *diskAddress;
308			data->bufOffset = offset;
309			offset = offset + diskAddress->numSector;
310			EnqueueParityLogData(data, &resultHead, &resultTail);
311			/* adjust disk address */
312			diskAddress = diskAddress->next;
313		} else {
314			/* disk address crosses a region boundary */
315			/* find address where region is crossed */
316			boundary = 0;
317			while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
318				boundary++;
319
320			/* enter data before the boundary */
321			data->diskAddress = *diskAddress;
322			data->diskAddress.numSector = boundary;
323			data->bufOffset = offset;
324			offset += boundary;
325			EnqueueParityLogData(data, &resultHead, &resultTail);
326			/* adjust disk address */
327			diskAddress->startSector += boundary;
328			diskAddress->numSector -= boundary;
329		}
330	}
331	if (rf_parityLogDebug)
332		printf("[leaving CreateParityLogData]\n");
333	return (resultHead);
334}
335
336
337RF_ParityLogData_t *
338rf_SearchAndDequeueParityLogData(
339    RF_Raid_t * raidPtr,
340    int regionID,
341    RF_ParityLogData_t ** head,
342    RF_ParityLogData_t ** tail,
343    int ignoreLocks)
344{
345	RF_ParityLogData_t *w;
346
347	/* Remove and return an in-core parity log from a specified region
348	 * (regionID). If a matching log is not found, return NULL.
349	 *
350	 * NON-BLOCKING. */
351
352	/* walk backward through a list, looking for an entry with a matching
353	 * region ID */
354	if (!ignoreLocks)
355		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
356	w = (*tail);
357	while (w) {
358		if (w->regionID == regionID) {
359			/* remove an element from the list */
360			if (w == *tail) {
361				if (*head == *tail) {
362					/* removing only element in the list */
363					*head = NULL;
364					*tail = NULL;
365				} else {
366					/* removing last item in the list */
367					*tail = (*tail)->prev;
368					(*tail)->next = NULL;
369					RF_ASSERT((*head)->prev == NULL);
370					RF_ASSERT((*tail)->next == NULL);
371				}
372			} else {
373				if (w == *head) {
374					/* removing first item in the list */
375					*head = (*head)->next;
376					(*head)->prev = NULL;
377					RF_ASSERT((*head)->prev == NULL);
378					RF_ASSERT((*tail)->next == NULL);
379				} else {
380					/* removing an item from the middle of
381					 * the list */
382					w->prev->next = w->next;
383					w->next->prev = w->prev;
384					RF_ASSERT((*head)->prev == NULL);
385					RF_ASSERT((*tail)->next == NULL);
386				}
387			}
388			w->prev = NULL;
389			w->next = NULL;
390			if (rf_parityLogDebug)
391				printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
392			return (w);
393		} else
394			w = w->prev;
395	}
396	if (!ignoreLocks)
397		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
398	return (NULL);
399}
400
401static RF_ParityLogData_t *
402DequeueMatchingLogData(
403    RF_Raid_t * raidPtr,
404    RF_ParityLogData_t ** head,
405    RF_ParityLogData_t ** tail)
406{
407	RF_ParityLogData_t *logDataList, *logData;
408	int     regionID;
409
410	/* Remove and return an in-core parity log from the tail of a disk
411	 * queue (*head, *tail).  Then remove all matching (identical
412	 * regionIDs) logData and return as a linked list.
413	 *
414	 * NON-BLOCKING */
415
416	logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
417	if (logDataList) {
418		regionID = logDataList->regionID;
419		logData = logDataList;
420		logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
421		while (logData->next) {
422			logData = logData->next;
423			logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
424		}
425	}
426	return (logDataList);
427}
428
429
430static RF_ParityLog_t *
431AcquireParityLog(
432    RF_ParityLogData_t * logData,
433    int finish)
434{
435	RF_ParityLog_t *log = NULL;
436	RF_Raid_t *raidPtr;
437
438	/* Grab a log buffer from the pool and return it. If no buffers are
439	 * available, return NULL. NON-BLOCKING */
440	raidPtr = logData->common->raidPtr;
441	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
442	if (raidPtr->parityLogPool.parityLogs) {
443		log = raidPtr->parityLogPool.parityLogs;
444		raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
445		log->regionID = logData->regionID;
446		log->numRecords = 0;
447		log->next = NULL;
448		raidPtr->logsInUse++;
449		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
450	} else {
451		/* no logs available, so place ourselves on the queue of work
452		 * waiting on log buffers this is done while
453		 * parityLogPool.mutex is held, to ensure synchronization with
454		 * ReleaseParityLogs. */
455		if (rf_parityLogDebug)
456			printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
457		if (finish)
458			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
459		else
460			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
461	}
462	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
463	return (log);
464}
465
466void
467rf_ReleaseParityLogs(
468    RF_Raid_t * raidPtr,
469    RF_ParityLog_t * firstLog)
470{
471	RF_ParityLogData_t *logDataList;
472	RF_ParityLog_t *log, *lastLog;
473	int     cnt;
474
475	/* Insert a linked list of parity logs (firstLog) to the free list
476	 * (parityLogPool.parityLogPool)
477	 *
478	 * NON-BLOCKING. */
479
480	RF_ASSERT(firstLog);
481
482	/* Before returning logs to global free list, service all requests
483	 * which are blocked on logs.  Holding mutexes for parityLogPool and
484	 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
485	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
486	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
487	logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
488	log = firstLog;
489	if (firstLog)
490		firstLog = firstLog->next;
491	log->numRecords = 0;
492	log->next = NULL;
493	while (logDataList && log) {
494		RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
495		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
496		rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
497		if (rf_parityLogDebug)
498			printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
499		if (log == NULL) {
500			log = firstLog;
501			if (firstLog) {
502				firstLog = firstLog->next;
503				log->numRecords = 0;
504				log->next = NULL;
505			}
506		}
507		RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
508		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
509		if (log)
510			logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
511	}
512	/* return remaining logs to pool */
513	if (log) {
514		log->next = firstLog;
515		firstLog = log;
516	}
517	if (firstLog) {
518		lastLog = firstLog;
519		raidPtr->logsInUse--;
520		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
521		while (lastLog->next) {
522			lastLog = lastLog->next;
523			raidPtr->logsInUse--;
524			RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
525		}
526		lastLog->next = raidPtr->parityLogPool.parityLogs;
527		raidPtr->parityLogPool.parityLogs = firstLog;
528		cnt = 0;
529		log = raidPtr->parityLogPool.parityLogs;
530		while (log) {
531			cnt++;
532			log = log->next;
533		}
534		RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
535	}
536	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
537	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
538}
539
540static void
541ReintLog(
542    RF_Raid_t * raidPtr,
543    int regionID,
544    RF_ParityLog_t * log)
545{
546	RF_ASSERT(log);
547
548	/* Insert an in-core parity log (log) into the disk queue of
549	 * reintegration work.  Set the flag (reintInProgress) for the
550	 * specified region (regionID) to indicate that reintegration is in
551	 * progress for this region. NON-BLOCKING */
552
553	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
554	raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE;	/* cleared when reint
555									 * complete */
556
557	if (rf_parityLogDebug)
558		printf("[requesting reintegration of region %d]\n", log->regionID);
559	/* move record to reintegration queue */
560	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
561	log->next = raidPtr->parityLogDiskQueue.reintQueue;
562	raidPtr->parityLogDiskQueue.reintQueue = log;
563	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
564	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
565	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
566}
567
568static void
569FlushLog(
570    RF_Raid_t * raidPtr,
571    RF_ParityLog_t * log)
572{
573	/* insert a core log (log) into a list of logs
574	 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
575	 * NON-BLOCKING */
576
577	RF_ASSERT(log);
578	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
579	RF_ASSERT(log->next == NULL);
580	/* move log to flush queue */
581	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
582	log->next = raidPtr->parityLogDiskQueue.flushQueue;
583	raidPtr->parityLogDiskQueue.flushQueue = log;
584	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
585	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
586}
587
588static int
589DumpParityLogToDisk(
590    int finish,
591    RF_ParityLogData_t * logData)
592{
593	int     i, diskCount, regionID = logData->regionID;
594	RF_ParityLog_t *log;
595	RF_Raid_t *raidPtr;
596
597	raidPtr = logData->common->raidPtr;
598
599	/* Move a core log to disk.  If the log disk is full, initiate
600	 * reintegration.
601	 *
602	 * Return (0) if we can enqueue the dump immediately, otherwise return
603	 * (1) to indicate we are blocked on reintegration and control of the
604	 * thread should be relinquished.
605	 *
606	 * Caller must hold regionInfo[regionID].mutex
607	 *
608	 * NON-BLOCKING */
609
610	if (rf_parityLogDebug)
611		printf("[dumping parity log to disk, region %d]\n", regionID);
612	log = raidPtr->regionInfo[regionID].coreLog;
613	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
614	RF_ASSERT(log->next == NULL);
615
616	/* if reintegration is in progress, must queue work */
617	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
618	if (raidPtr->regionInfo[regionID].reintInProgress) {
619		/* Can not proceed since this region is currently being
620		 * reintegrated. We can not block, so queue remaining work and
621		 * return */
622		if (rf_parityLogDebug)
623			printf("[region %d waiting on reintegration]\n", regionID);
624		/* XXX not sure about the use of finish - shouldn't this
625		 * always be "Enqueue"? */
626		if (finish)
627			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
628		else
629			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
630		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
631		return (1);	/* relenquish control of this thread */
632	}
633	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
634	raidPtr->regionInfo[regionID].coreLog = NULL;
635	if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
636		/* IMPORTANT!! this loop bound assumes region disk holds an
637		 * integral number of core logs */
638	{
639		/* update disk map for this region */
640		diskCount = raidPtr->regionInfo[regionID].diskCount;
641		for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
642			raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
643			raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
644		}
645		log->diskOffset = diskCount;
646		raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
647		FlushLog(raidPtr, log);
648	} else {
649		/* no room for log on disk, send it to disk manager and
650		 * request reintegration */
651		RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
652		ReintLog(raidPtr, regionID, log);
653	}
654	if (rf_parityLogDebug)
655		printf("[finished dumping parity log to disk, region %d]\n", regionID);
656	return (0);
657}
658
659int
660rf_ParityLogAppend(
661    RF_ParityLogData_t * logData,
662    int finish,
663    RF_ParityLog_t ** incomingLog,
664    int clearReintFlag)
665{
666	int     regionID, logItem, itemDone;
667	RF_ParityLogData_t *item;
668	int     punt, done = RF_FALSE;
669	RF_ParityLog_t *log;
670	RF_Raid_t *raidPtr;
671	RF_Etimer_t timer;
672	int     (*wakeFunc) (RF_DagNode_t * node, int status);
673	void   *wakeArg;
674
675	/* Add parity to the appropriate log, one sector at a time. This
676	 * routine is called is called by dag functions ParityLogUpdateFunc
677	 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
678	 *
679	 * Parity to be logged is contained in a linked-list (logData).  When
680	 * this routine returns, every sector in the list will be in one of
681	 * three places: 1) entered into the parity log 2) queued, waiting on
682	 * reintegration 3) queued, waiting on a core log
683	 *
684	 * Blocked work is passed to the ParityLoggingDiskManager for completion.
685	 * Later, as conditions which required the block are removed, the work
686	 * reenters this routine with the "finish" parameter set to "RF_TRUE."
687	 *
688	 * NON-BLOCKING */
689
690	raidPtr = logData->common->raidPtr;
691	/* lock the region for the first item in logData */
692	RF_ASSERT(logData != NULL);
693	regionID = logData->regionID;
694	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
695	RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
696
697	if (clearReintFlag) {
698		/* Enable flushing for this region.  Holding both locks
699		 * provides a synchronization barrier with DumpParityLogToDisk */
700		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
701		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
702		RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
703		raidPtr->regionInfo[regionID].diskCount = 0;
704		raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
705		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);	/* flushing is now
706										 * enabled */
707		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
708	}
709	/* process each item in logData */
710	while (logData) {
711		/* remove an item from logData */
712		item = logData;
713		logData = logData->next;
714		item->next = NULL;
715		item->prev = NULL;
716
717		if (rf_parityLogDebug)
718			printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
719
720		/* see if we moved to a new region */
721		if (regionID != item->regionID) {
722			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
723			regionID = item->regionID;
724			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
725			RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
726		}
727		punt = RF_FALSE;/* Set to RF_TRUE if work is blocked.  This
728				 * can happen in one of two ways: 1) no core
729				 * log (AcquireParityLog) 2) waiting on
730				 * reintegration (DumpParityLogToDisk) If punt
731				 * is RF_TRUE, the dataItem was queued, so
732				 * skip to next item. */
733
734		/* process item, one sector at a time, until all sectors
735		 * processed or we punt */
736		if (item->diskAddress.numSector > 0)
737			done = RF_FALSE;
738		else
739			RF_ASSERT(0);
740		while (!punt && !done) {
741			/* verify that a core log exists for this region */
742			if (!raidPtr->regionInfo[regionID].coreLog) {
743				/* Attempt to acquire a parity log. If
744				 * acquisition fails, queue remaining work in
745				 * data item and move to nextItem. */
746				if (incomingLog)
747					if (*incomingLog) {
748						RF_ASSERT((*incomingLog)->next == NULL);
749						raidPtr->regionInfo[regionID].coreLog = *incomingLog;
750						raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
751						*incomingLog = NULL;
752					} else
753						raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
754				else
755					raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
756				/* Note: AcquireParityLog either returns a log
757				 * or enqueues currentItem */
758			}
759			if (!raidPtr->regionInfo[regionID].coreLog)
760				punt = RF_TRUE;	/* failed to find a core log */
761			else {
762				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
763				/* verify that the log has room for new
764				 * entries */
765				/* if log is full, dump it to disk and grab a
766				 * new log */
767				if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
768					/* log is full, dump it to disk */
769					if (DumpParityLogToDisk(finish, item))
770						punt = RF_TRUE;	/* dump unsuccessful,
771								 * blocked on
772								 * reintegration */
773					else {
774						/* dump was successful */
775						if (incomingLog)
776							if (*incomingLog) {
777								RF_ASSERT((*incomingLog)->next == NULL);
778								raidPtr->regionInfo[regionID].coreLog = *incomingLog;
779								raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
780								*incomingLog = NULL;
781							} else
782								raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
783						else
784							raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
785						/* if a core log is not
786						 * available, must queue work
787						 * and return */
788						if (!raidPtr->regionInfo[regionID].coreLog)
789							punt = RF_TRUE;	/* blocked on log
790									 * availability */
791					}
792				}
793			}
794			/* if we didn't punt on this item, attempt to add a
795			 * sector to the core log */
796			if (!punt) {
797				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
798				/* at this point, we have a core log with
799				 * enough room for a sector */
800				/* copy a sector into the log */
801				log = raidPtr->regionInfo[regionID].coreLog;
802				RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
803				logItem = log->numRecords++;
804				log->records[logItem].parityAddr = item->diskAddress;
805				RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
806				RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
807				log->records[logItem].parityAddr.numSector = 1;
808				log->records[logItem].operation = item->common->operation;
809				bcopy((item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (1 << item->common->raidPtr->logBytesPerSector));
810				item->diskAddress.numSector--;
811				item->diskAddress.startSector++;
812				if (item->diskAddress.numSector == 0)
813					done = RF_TRUE;
814			}
815		}
816
817		if (!punt) {
818			/* Processed this item completely, decrement count of
819			 * items to be processed. */
820			RF_ASSERT(item->diskAddress.numSector == 0);
821			RF_LOCK_MUTEX(item->common->mutex);
822			item->common->cnt--;
823			if (item->common->cnt == 0)
824				itemDone = RF_TRUE;
825			else
826				itemDone = RF_FALSE;
827			RF_UNLOCK_MUTEX(item->common->mutex);
828			if (itemDone) {
829				/* Finished processing all log data for this
830				 * IO Return structs to free list and invoke
831				 * wakeup function. */
832				timer = item->common->startTime;	/* grab initial value of
833									 * timer */
834				RF_ETIMER_STOP(timer);
835				RF_ETIMER_EVAL(timer);
836				item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
837				if (rf_parityLogDebug)
838					printf("[waking process for region %d]\n", item->regionID);
839				wakeFunc = item->common->wakeFunc;
840				wakeArg = item->common->wakeArg;
841				FreeParityLogCommonData(item->common);
842				FreeParityLogData(item);
843				(wakeFunc) (wakeArg, 0);
844			} else
845				FreeParityLogData(item);
846		}
847	}
848	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
849	if (rf_parityLogDebug)
850		printf("[exiting ParityLogAppend]\n");
851	return (0);
852}
853
854
855void
856rf_EnableParityLogging(RF_Raid_t * raidPtr)
857{
858	int     regionID;
859
860	for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
861		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
862		raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
863		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
864	}
865	if (rf_parityLogDebug)
866		printf("[parity logging enabled]\n");
867}
868#endif				/* RF_INCLUDE_PARITYLOGGING > 0 */
869