rf_paritylog.c revision 1.11
1/*	$NetBSD: rf_paritylog.c,v 1.11 2005/12/11 12:23:37 christos Exp $	*/
2/*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29/* Code for manipulating in-core parity logs
30 *
31 */
32
33#include <sys/cdefs.h>
34__KERNEL_RCSID(0, "$NetBSD: rf_paritylog.c,v 1.11 2005/12/11 12:23:37 christos Exp $");
35
36#include "rf_archs.h"
37
38#if RF_INCLUDE_PARITYLOGGING > 0
39
40/*
41 * Append-only log for recording parity "update" and "overwrite" records
42 */
43
44#include <dev/raidframe/raidframevar.h>
45
46#include "rf_threadstuff.h"
47#include "rf_mcpair.h"
48#include "rf_raid.h"
49#include "rf_dag.h"
50#include "rf_dagfuncs.h"
51#include "rf_desc.h"
52#include "rf_layout.h"
53#include "rf_diskqueue.h"
54#include "rf_etimer.h"
55#include "rf_paritylog.h"
56#include "rf_general.h"
57#include "rf_map.h"
58#include "rf_paritylogging.h"
59#include "rf_paritylogDiskMgr.h"
60
61static RF_CommonLogData_t *
62AllocParityLogCommonData(RF_Raid_t * raidPtr)
63{
64	RF_CommonLogData_t *common = NULL;
65	int     rc;
66
67	/* Return a struct for holding common parity log information from the
68	 * free list (rf_parityLogDiskQueue.freeCommonList).  If the free list
69	 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
70
71	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
72	if (raidPtr->parityLogDiskQueue.freeCommonList) {
73		common = raidPtr->parityLogDiskQueue.freeCommonList;
74		raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
75		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
76	} else {
77		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
78		RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
79		rc = rf_mutex_init(&common->mutex);
80		if (rc) {
81			rf_print_unable_to_init_mutex(__FILE__, __LINE__, rc);
82			RF_Free(common, sizeof(RF_CommonLogData_t));
83			common = NULL;
84		}
85	}
86	common->next = NULL;
87	return (common);
88}
89
90static void
91FreeParityLogCommonData(RF_CommonLogData_t * common)
92{
93	RF_Raid_t *raidPtr;
94
95	/* Insert a single struct for holding parity log information (data)
96	 * into the free list (rf_parityLogDiskQueue.freeCommonList).
97	 * NON-BLOCKING */
98
99	raidPtr = common->raidPtr;
100	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
101	common->next = raidPtr->parityLogDiskQueue.freeCommonList;
102	raidPtr->parityLogDiskQueue.freeCommonList = common;
103	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
104}
105
106static RF_ParityLogData_t *
107AllocParityLogData(RF_Raid_t * raidPtr)
108{
109	RF_ParityLogData_t *data = NULL;
110
111	/* Return a struct for holding parity log information from the free
112	 * list (rf_parityLogDiskQueue.freeList).  If the free list is empty,
113	 * call RF_Malloc to create a new structure. NON-BLOCKING */
114
115	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
116	if (raidPtr->parityLogDiskQueue.freeDataList) {
117		data = raidPtr->parityLogDiskQueue.freeDataList;
118		raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
119		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
120	} else {
121		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
122		RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
123	}
124	data->next = NULL;
125	data->prev = NULL;
126	return (data);
127}
128
129
130static void
131FreeParityLogData(RF_ParityLogData_t * data)
132{
133	RF_ParityLogData_t *nextItem;
134	RF_Raid_t *raidPtr;
135
136	/* Insert a linked list of structs for holding parity log information
137	 * (data) into the free list (parityLogDiskQueue.freeList).
138	 * NON-BLOCKING */
139
140	raidPtr = data->common->raidPtr;
141	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
142	while (data) {
143		nextItem = data->next;
144		data->next = raidPtr->parityLogDiskQueue.freeDataList;
145		raidPtr->parityLogDiskQueue.freeDataList = data;
146		data = nextItem;
147	}
148	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
149}
150
151
152static void
153EnqueueParityLogData(
154    RF_ParityLogData_t * data,
155    RF_ParityLogData_t ** head,
156    RF_ParityLogData_t ** tail)
157{
158	RF_Raid_t *raidPtr;
159
160	/* Insert an in-core parity log (*data) into the head of a disk queue
161	 * (*head, *tail). NON-BLOCKING */
162
163	raidPtr = data->common->raidPtr;
164	if (rf_parityLogDebug)
165		printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
166	RF_ASSERT(data->prev == NULL);
167	RF_ASSERT(data->next == NULL);
168	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
169	if (*head) {
170		/* insert into head of queue */
171		RF_ASSERT((*head)->prev == NULL);
172		RF_ASSERT((*tail)->next == NULL);
173		data->next = *head;
174		(*head)->prev = data;
175		*head = data;
176	} else {
177		/* insert into empty list */
178		RF_ASSERT(*head == NULL);
179		RF_ASSERT(*tail == NULL);
180		*head = data;
181		*tail = data;
182	}
183	RF_ASSERT((*head)->prev == NULL);
184	RF_ASSERT((*tail)->next == NULL);
185	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
186}
187
188static RF_ParityLogData_t *
189DequeueParityLogData(
190    RF_Raid_t * raidPtr,
191    RF_ParityLogData_t ** head,
192    RF_ParityLogData_t ** tail,
193    int ignoreLocks)
194{
195	RF_ParityLogData_t *data;
196
197	/* Remove and return an in-core parity log from the tail of a disk
198	 * queue (*head, *tail). NON-BLOCKING */
199
200	/* remove from tail, preserving FIFO order */
201	if (!ignoreLocks)
202		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
203	data = *tail;
204	if (data) {
205		if (*head == *tail) {
206			/* removing last item from queue */
207			*head = NULL;
208			*tail = NULL;
209		} else {
210			*tail = (*tail)->prev;
211			(*tail)->next = NULL;
212			RF_ASSERT((*head)->prev == NULL);
213			RF_ASSERT((*tail)->next == NULL);
214		}
215		data->next = NULL;
216		data->prev = NULL;
217		if (rf_parityLogDebug)
218			printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
219	}
220	if (*head) {
221		RF_ASSERT((*head)->prev == NULL);
222		RF_ASSERT((*tail)->next == NULL);
223	}
224	if (!ignoreLocks)
225		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
226	return (data);
227}
228
229
230static void
231RequeueParityLogData(
232    RF_ParityLogData_t * data,
233    RF_ParityLogData_t ** head,
234    RF_ParityLogData_t ** tail)
235{
236	RF_Raid_t *raidPtr;
237
238	/* Insert an in-core parity log (*data) into the tail of a disk queue
239	 * (*head, *tail). NON-BLOCKING */
240
241	raidPtr = data->common->raidPtr;
242	RF_ASSERT(data);
243	if (rf_parityLogDebug)
244		printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
245	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
246	if (*tail) {
247		/* append to tail of list */
248		data->prev = *tail;
249		data->next = NULL;
250		(*tail)->next = data;
251		*tail = data;
252	} else {
253		/* inserting into an empty list */
254		*head = data;
255		*tail = data;
256		(*head)->prev = NULL;
257		(*tail)->next = NULL;
258	}
259	RF_ASSERT((*head)->prev == NULL);
260	RF_ASSERT((*tail)->next == NULL);
261	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
262}
263
264RF_ParityLogData_t *
265rf_CreateParityLogData(
266    RF_ParityRecordType_t operation,
267    RF_PhysDiskAddr_t * pda,
268    caddr_t bufPtr,
269    RF_Raid_t * raidPtr,
270    int (*wakeFunc) (RF_DagNode_t * node, int status),
271    void *wakeArg,
272    RF_AccTraceEntry_t * tracerec,
273    RF_Etimer_t startTime)
274{
275	RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
276	RF_CommonLogData_t *common;
277	RF_PhysDiskAddr_t *diskAddress;
278	int     boundary, offset = 0;
279
280	/* Return an initialized struct of info to be logged. Build one item
281	 * per physical disk address, one item per region.
282	 *
283	 * NON-BLOCKING */
284
285	diskAddress = pda;
286	common = AllocParityLogCommonData(raidPtr);
287	RF_ASSERT(common);
288
289	common->operation = operation;
290	common->bufPtr = bufPtr;
291	common->raidPtr = raidPtr;
292	common->wakeFunc = wakeFunc;
293	common->wakeArg = wakeArg;
294	common->tracerec = tracerec;
295	common->startTime = startTime;
296	common->cnt = 0;
297
298	if (rf_parityLogDebug)
299		printf("[entering CreateParityLogData]\n");
300	while (diskAddress) {
301		common->cnt++;
302		data = AllocParityLogData(raidPtr);
303		RF_ASSERT(data);
304		data->common = common;
305		data->next = NULL;
306		data->prev = NULL;
307		data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
308		if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
309			/* disk address does not cross a region boundary */
310			data->diskAddress = *diskAddress;
311			data->bufOffset = offset;
312			offset = offset + diskAddress->numSector;
313			EnqueueParityLogData(data, &resultHead, &resultTail);
314			/* adjust disk address */
315			diskAddress = diskAddress->next;
316		} else {
317			/* disk address crosses a region boundary */
318			/* find address where region is crossed */
319			boundary = 0;
320			while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
321				boundary++;
322
323			/* enter data before the boundary */
324			data->diskAddress = *diskAddress;
325			data->diskAddress.numSector = boundary;
326			data->bufOffset = offset;
327			offset += boundary;
328			EnqueueParityLogData(data, &resultHead, &resultTail);
329			/* adjust disk address */
330			diskAddress->startSector += boundary;
331			diskAddress->numSector -= boundary;
332		}
333	}
334	if (rf_parityLogDebug)
335		printf("[leaving CreateParityLogData]\n");
336	return (resultHead);
337}
338
339
340RF_ParityLogData_t *
341rf_SearchAndDequeueParityLogData(
342    RF_Raid_t * raidPtr,
343    int regionID,
344    RF_ParityLogData_t ** head,
345    RF_ParityLogData_t ** tail,
346    int ignoreLocks)
347{
348	RF_ParityLogData_t *w;
349
350	/* Remove and return an in-core parity log from a specified region
351	 * (regionID). If a matching log is not found, return NULL.
352	 *
353	 * NON-BLOCKING. */
354
355	/* walk backward through a list, looking for an entry with a matching
356	 * region ID */
357	if (!ignoreLocks)
358		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
359	w = (*tail);
360	while (w) {
361		if (w->regionID == regionID) {
362			/* remove an element from the list */
363			if (w == *tail) {
364				if (*head == *tail) {
365					/* removing only element in the list */
366					*head = NULL;
367					*tail = NULL;
368				} else {
369					/* removing last item in the list */
370					*tail = (*tail)->prev;
371					(*tail)->next = NULL;
372					RF_ASSERT((*head)->prev == NULL);
373					RF_ASSERT((*tail)->next == NULL);
374				}
375			} else {
376				if (w == *head) {
377					/* removing first item in the list */
378					*head = (*head)->next;
379					(*head)->prev = NULL;
380					RF_ASSERT((*head)->prev == NULL);
381					RF_ASSERT((*tail)->next == NULL);
382				} else {
383					/* removing an item from the middle of
384					 * the list */
385					w->prev->next = w->next;
386					w->next->prev = w->prev;
387					RF_ASSERT((*head)->prev == NULL);
388					RF_ASSERT((*tail)->next == NULL);
389				}
390			}
391			w->prev = NULL;
392			w->next = NULL;
393			if (rf_parityLogDebug)
394				printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
395			return (w);
396		} else
397			w = w->prev;
398	}
399	if (!ignoreLocks)
400		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
401	return (NULL);
402}
403
404static RF_ParityLogData_t *
405DequeueMatchingLogData(
406    RF_Raid_t * raidPtr,
407    RF_ParityLogData_t ** head,
408    RF_ParityLogData_t ** tail)
409{
410	RF_ParityLogData_t *logDataList, *logData;
411	int     regionID;
412
413	/* Remove and return an in-core parity log from the tail of a disk
414	 * queue (*head, *tail).  Then remove all matching (identical
415	 * regionIDs) logData and return as a linked list.
416	 *
417	 * NON-BLOCKING */
418
419	logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
420	if (logDataList) {
421		regionID = logDataList->regionID;
422		logData = logDataList;
423		logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
424		while (logData->next) {
425			logData = logData->next;
426			logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
427		}
428	}
429	return (logDataList);
430}
431
432
433static RF_ParityLog_t *
434AcquireParityLog(
435    RF_ParityLogData_t * logData,
436    int finish)
437{
438	RF_ParityLog_t *log = NULL;
439	RF_Raid_t *raidPtr;
440
441	/* Grab a log buffer from the pool and return it. If no buffers are
442	 * available, return NULL. NON-BLOCKING */
443	raidPtr = logData->common->raidPtr;
444	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
445	if (raidPtr->parityLogPool.parityLogs) {
446		log = raidPtr->parityLogPool.parityLogs;
447		raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
448		log->regionID = logData->regionID;
449		log->numRecords = 0;
450		log->next = NULL;
451		raidPtr->logsInUse++;
452		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
453	} else {
454		/* no logs available, so place ourselves on the queue of work
455		 * waiting on log buffers this is done while
456		 * parityLogPool.mutex is held, to ensure synchronization with
457		 * ReleaseParityLogs. */
458		if (rf_parityLogDebug)
459			printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
460		if (finish)
461			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
462		else
463			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
464	}
465	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
466	return (log);
467}
468
469void
470rf_ReleaseParityLogs(
471    RF_Raid_t * raidPtr,
472    RF_ParityLog_t * firstLog)
473{
474	RF_ParityLogData_t *logDataList;
475	RF_ParityLog_t *log, *lastLog;
476	int     cnt;
477
478	/* Insert a linked list of parity logs (firstLog) to the free list
479	 * (parityLogPool.parityLogPool)
480	 *
481	 * NON-BLOCKING. */
482
483	RF_ASSERT(firstLog);
484
485	/* Before returning logs to global free list, service all requests
486	 * which are blocked on logs.  Holding mutexes for parityLogPool and
487	 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
488	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
489	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
490	logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
491	log = firstLog;
492	if (firstLog)
493		firstLog = firstLog->next;
494	log->numRecords = 0;
495	log->next = NULL;
496	while (logDataList && log) {
497		RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
498		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
499		rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
500		if (rf_parityLogDebug)
501			printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
502		if (log == NULL) {
503			log = firstLog;
504			if (firstLog) {
505				firstLog = firstLog->next;
506				log->numRecords = 0;
507				log->next = NULL;
508			}
509		}
510		RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
511		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
512		if (log)
513			logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
514	}
515	/* return remaining logs to pool */
516	if (log) {
517		log->next = firstLog;
518		firstLog = log;
519	}
520	if (firstLog) {
521		lastLog = firstLog;
522		raidPtr->logsInUse--;
523		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
524		while (lastLog->next) {
525			lastLog = lastLog->next;
526			raidPtr->logsInUse--;
527			RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
528		}
529		lastLog->next = raidPtr->parityLogPool.parityLogs;
530		raidPtr->parityLogPool.parityLogs = firstLog;
531		cnt = 0;
532		log = raidPtr->parityLogPool.parityLogs;
533		while (log) {
534			cnt++;
535			log = log->next;
536		}
537		RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
538	}
539	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
540	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
541}
542
543static void
544ReintLog(
545    RF_Raid_t * raidPtr,
546    int regionID,
547    RF_ParityLog_t * log)
548{
549	RF_ASSERT(log);
550
551	/* Insert an in-core parity log (log) into the disk queue of
552	 * reintegration work.  Set the flag (reintInProgress) for the
553	 * specified region (regionID) to indicate that reintegration is in
554	 * progress for this region. NON-BLOCKING */
555
556	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
557	raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE;	/* cleared when reint
558									 * complete */
559
560	if (rf_parityLogDebug)
561		printf("[requesting reintegration of region %d]\n", log->regionID);
562	/* move record to reintegration queue */
563	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
564	log->next = raidPtr->parityLogDiskQueue.reintQueue;
565	raidPtr->parityLogDiskQueue.reintQueue = log;
566	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
567	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
568	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
569}
570
571static void
572FlushLog(
573    RF_Raid_t * raidPtr,
574    RF_ParityLog_t * log)
575{
576	/* insert a core log (log) into a list of logs
577	 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
578	 * NON-BLOCKING */
579
580	RF_ASSERT(log);
581	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
582	RF_ASSERT(log->next == NULL);
583	/* move log to flush queue */
584	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
585	log->next = raidPtr->parityLogDiskQueue.flushQueue;
586	raidPtr->parityLogDiskQueue.flushQueue = log;
587	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
588	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
589}
590
591static int
592DumpParityLogToDisk(
593    int finish,
594    RF_ParityLogData_t * logData)
595{
596	int     i, diskCount, regionID = logData->regionID;
597	RF_ParityLog_t *log;
598	RF_Raid_t *raidPtr;
599
600	raidPtr = logData->common->raidPtr;
601
602	/* Move a core log to disk.  If the log disk is full, initiate
603	 * reintegration.
604	 *
605	 * Return (0) if we can enqueue the dump immediately, otherwise return
606	 * (1) to indicate we are blocked on reintegration and control of the
607	 * thread should be relinquished.
608	 *
609	 * Caller must hold regionInfo[regionID].mutex
610	 *
611	 * NON-BLOCKING */
612
613	if (rf_parityLogDebug)
614		printf("[dumping parity log to disk, region %d]\n", regionID);
615	log = raidPtr->regionInfo[regionID].coreLog;
616	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
617	RF_ASSERT(log->next == NULL);
618
619	/* if reintegration is in progress, must queue work */
620	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
621	if (raidPtr->regionInfo[regionID].reintInProgress) {
622		/* Can not proceed since this region is currently being
623		 * reintegrated. We can not block, so queue remaining work and
624		 * return */
625		if (rf_parityLogDebug)
626			printf("[region %d waiting on reintegration]\n", regionID);
627		/* XXX not sure about the use of finish - shouldn't this
628		 * always be "Enqueue"? */
629		if (finish)
630			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
631		else
632			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
633		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
634		return (1);	/* relenquish control of this thread */
635	}
636	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
637	raidPtr->regionInfo[regionID].coreLog = NULL;
638	if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
639		/* IMPORTANT!! this loop bound assumes region disk holds an
640		 * integral number of core logs */
641	{
642		/* update disk map for this region */
643		diskCount = raidPtr->regionInfo[regionID].diskCount;
644		for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
645			raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
646			raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
647		}
648		log->diskOffset = diskCount;
649		raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
650		FlushLog(raidPtr, log);
651	} else {
652		/* no room for log on disk, send it to disk manager and
653		 * request reintegration */
654		RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
655		ReintLog(raidPtr, regionID, log);
656	}
657	if (rf_parityLogDebug)
658		printf("[finished dumping parity log to disk, region %d]\n", regionID);
659	return (0);
660}
661
662int
663rf_ParityLogAppend(
664    RF_ParityLogData_t * logData,
665    int finish,
666    RF_ParityLog_t ** incomingLog,
667    int clearReintFlag)
668{
669	int     regionID, logItem, itemDone;
670	RF_ParityLogData_t *item;
671	int     punt, done = RF_FALSE;
672	RF_ParityLog_t *log;
673	RF_Raid_t *raidPtr;
674	RF_Etimer_t timer;
675	int     (*wakeFunc) (RF_DagNode_t * node, int status);
676	void   *wakeArg;
677
678	/* Add parity to the appropriate log, one sector at a time. This
679	 * routine is called is called by dag functions ParityLogUpdateFunc
680	 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
681	 *
682	 * Parity to be logged is contained in a linked-list (logData).  When
683	 * this routine returns, every sector in the list will be in one of
684	 * three places: 1) entered into the parity log 2) queued, waiting on
685	 * reintegration 3) queued, waiting on a core log
686	 *
687	 * Blocked work is passed to the ParityLoggingDiskManager for completion.
688	 * Later, as conditions which required the block are removed, the work
689	 * reenters this routine with the "finish" parameter set to "RF_TRUE."
690	 *
691	 * NON-BLOCKING */
692
693	raidPtr = logData->common->raidPtr;
694	/* lock the region for the first item in logData */
695	RF_ASSERT(logData != NULL);
696	regionID = logData->regionID;
697	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
698	RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
699
700	if (clearReintFlag) {
701		/* Enable flushing for this region.  Holding both locks
702		 * provides a synchronization barrier with DumpParityLogToDisk */
703		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
704		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
705		RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
706		raidPtr->regionInfo[regionID].diskCount = 0;
707		raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
708		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);	/* flushing is now
709										 * enabled */
710		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
711	}
712	/* process each item in logData */
713	while (logData) {
714		/* remove an item from logData */
715		item = logData;
716		logData = logData->next;
717		item->next = NULL;
718		item->prev = NULL;
719
720		if (rf_parityLogDebug)
721			printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
722
723		/* see if we moved to a new region */
724		if (regionID != item->regionID) {
725			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
726			regionID = item->regionID;
727			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
728			RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
729		}
730		punt = RF_FALSE;/* Set to RF_TRUE if work is blocked.  This
731				 * can happen in one of two ways: 1) no core
732				 * log (AcquireParityLog) 2) waiting on
733				 * reintegration (DumpParityLogToDisk) If punt
734				 * is RF_TRUE, the dataItem was queued, so
735				 * skip to next item. */
736
737		/* process item, one sector at a time, until all sectors
738		 * processed or we punt */
739		if (item->diskAddress.numSector > 0)
740			done = RF_FALSE;
741		else
742			RF_ASSERT(0);
743		while (!punt && !done) {
744			/* verify that a core log exists for this region */
745			if (!raidPtr->regionInfo[regionID].coreLog) {
746				/* Attempt to acquire a parity log. If
747				 * acquisition fails, queue remaining work in
748				 * data item and move to nextItem. */
749				if (incomingLog)
750					if (*incomingLog) {
751						RF_ASSERT((*incomingLog)->next == NULL);
752						raidPtr->regionInfo[regionID].coreLog = *incomingLog;
753						raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
754						*incomingLog = NULL;
755					} else
756						raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
757				else
758					raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
759				/* Note: AcquireParityLog either returns a log
760				 * or enqueues currentItem */
761			}
762			if (!raidPtr->regionInfo[regionID].coreLog)
763				punt = RF_TRUE;	/* failed to find a core log */
764			else {
765				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
766				/* verify that the log has room for new
767				 * entries */
768				/* if log is full, dump it to disk and grab a
769				 * new log */
770				if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
771					/* log is full, dump it to disk */
772					if (DumpParityLogToDisk(finish, item))
773						punt = RF_TRUE;	/* dump unsuccessful,
774								 * blocked on
775								 * reintegration */
776					else {
777						/* dump was successful */
778						if (incomingLog)
779							if (*incomingLog) {
780								RF_ASSERT((*incomingLog)->next == NULL);
781								raidPtr->regionInfo[regionID].coreLog = *incomingLog;
782								raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
783								*incomingLog = NULL;
784							} else
785								raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
786						else
787							raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
788						/* if a core log is not
789						 * available, must queue work
790						 * and return */
791						if (!raidPtr->regionInfo[regionID].coreLog)
792							punt = RF_TRUE;	/* blocked on log
793									 * availability */
794					}
795				}
796			}
797			/* if we didn't punt on this item, attempt to add a
798			 * sector to the core log */
799			if (!punt) {
800				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
801				/* at this point, we have a core log with
802				 * enough room for a sector */
803				/* copy a sector into the log */
804				log = raidPtr->regionInfo[regionID].coreLog;
805				RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
806				logItem = log->numRecords++;
807				log->records[logItem].parityAddr = item->diskAddress;
808				RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
809				RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
810				log->records[logItem].parityAddr.numSector = 1;
811				log->records[logItem].operation = item->common->operation;
812				memcpy(log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), (1 << item->common->raidPtr->logBytesPerSector));
813				item->diskAddress.numSector--;
814				item->diskAddress.startSector++;
815				if (item->diskAddress.numSector == 0)
816					done = RF_TRUE;
817			}
818		}
819
820		if (!punt) {
821			/* Processed this item completely, decrement count of
822			 * items to be processed. */
823			RF_ASSERT(item->diskAddress.numSector == 0);
824			RF_LOCK_MUTEX(item->common->mutex);
825			item->common->cnt--;
826			if (item->common->cnt == 0)
827				itemDone = RF_TRUE;
828			else
829				itemDone = RF_FALSE;
830			RF_UNLOCK_MUTEX(item->common->mutex);
831			if (itemDone) {
832				/* Finished processing all log data for this
833				 * IO Return structs to free list and invoke
834				 * wakeup function. */
835				timer = item->common->startTime;	/* grab initial value of
836									 * timer */
837				RF_ETIMER_STOP(timer);
838				RF_ETIMER_EVAL(timer);
839				item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
840				if (rf_parityLogDebug)
841					printf("[waking process for region %d]\n", item->regionID);
842				wakeFunc = item->common->wakeFunc;
843				wakeArg = item->common->wakeArg;
844				FreeParityLogCommonData(item->common);
845				FreeParityLogData(item);
846				(wakeFunc) (wakeArg, 0);
847			} else
848				FreeParityLogData(item);
849		}
850	}
851	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
852	if (rf_parityLogDebug)
853		printf("[exiting ParityLogAppend]\n");
854	return (0);
855}
856
857
858void
859rf_EnableParityLogging(RF_Raid_t * raidPtr)
860{
861	int     regionID;
862
863	for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
864		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
865		raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
866		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
867	}
868	if (rf_parityLogDebug)
869		printf("[parity logging enabled]\n");
870}
871#endif				/* RF_INCLUDE_PARITYLOGGING > 0 */
872