rf_paritylog.c revision 1.3
1/*	$NetBSD: rf_paritylog.c,v 1.3 1999/02/05 00:06:13 oster Exp $	*/
2/*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29/* Code for manipulating in-core parity logs
30 *
31 */
32
33#include "rf_archs.h"
34
35#if RF_INCLUDE_PARITYLOGGING > 0
36
37/*
38 * Append-only log for recording parity "update" and "overwrite" records
39 */
40
41#include "rf_types.h"
42#include "rf_threadstuff.h"
43#include "rf_mcpair.h"
44#include "rf_raid.h"
45#include "rf_dag.h"
46#include "rf_dagfuncs.h"
47#include "rf_desc.h"
48#include "rf_layout.h"
49#include "rf_diskqueue.h"
50#include "rf_etimer.h"
51#include "rf_paritylog.h"
52#include "rf_general.h"
53#include "rf_threadid.h"
54#include "rf_map.h"
55#include "rf_paritylogging.h"
56#include "rf_paritylogDiskMgr.h"
57#include "rf_sys.h"
58
59static RF_CommonLogData_t *
60AllocParityLogCommonData(RF_Raid_t * raidPtr)
61{
62	RF_CommonLogData_t *common = NULL;
63	int     rc;
64
65	/* Return a struct for holding common parity log information from the
66	 * free list (rf_parityLogDiskQueue.freeCommonList).  If the free list
67	 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
68
69	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
70	if (raidPtr->parityLogDiskQueue.freeCommonList) {
71		common = raidPtr->parityLogDiskQueue.freeCommonList;
72		raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
73		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
74	} else {
75		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
76		RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
77		rc = rf_mutex_init(&common->mutex);
78		if (rc) {
79			RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
80			    __LINE__, rc);
81			RF_Free(common, sizeof(RF_CommonLogData_t));
82			common = NULL;
83		}
84	}
85	common->next = NULL;
86	return (common);
87}
88
89static void
90FreeParityLogCommonData(RF_CommonLogData_t * common)
91{
92	RF_Raid_t *raidPtr;
93
94	/* Insert a single struct for holding parity log information (data)
95	 * into the free list (rf_parityLogDiskQueue.freeCommonList).
96	 * NON-BLOCKING */
97
98	raidPtr = common->raidPtr;
99	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
100	common->next = raidPtr->parityLogDiskQueue.freeCommonList;
101	raidPtr->parityLogDiskQueue.freeCommonList = common;
102	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
103}
104
105static RF_ParityLogData_t *
106AllocParityLogData(RF_Raid_t * raidPtr)
107{
108	RF_ParityLogData_t *data = NULL;
109
110	/* Return a struct for holding parity log information from the free
111	 * list (rf_parityLogDiskQueue.freeList).  If the free list is empty,
112	 * call RF_Malloc to create a new structure. NON-BLOCKING */
113
114	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
115	if (raidPtr->parityLogDiskQueue.freeDataList) {
116		data = raidPtr->parityLogDiskQueue.freeDataList;
117		raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
118		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
119	} else {
120		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
121		RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
122	}
123	data->next = NULL;
124	data->prev = NULL;
125	return (data);
126}
127
128
129static void
130FreeParityLogData(RF_ParityLogData_t * data)
131{
132	RF_ParityLogData_t *nextItem;
133	RF_Raid_t *raidPtr;
134
135	/* Insert a linked list of structs for holding parity log information
136	 * (data) into the free list (parityLogDiskQueue.freeList).
137	 * NON-BLOCKING */
138
139	raidPtr = data->common->raidPtr;
140	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
141	while (data) {
142		nextItem = data->next;
143		data->next = raidPtr->parityLogDiskQueue.freeDataList;
144		raidPtr->parityLogDiskQueue.freeDataList = data;
145		data = nextItem;
146	}
147	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
148}
149
150
151static void
152EnqueueParityLogData(
153    RF_ParityLogData_t * data,
154    RF_ParityLogData_t ** head,
155    RF_ParityLogData_t ** tail)
156{
157	RF_Raid_t *raidPtr;
158
159	/* Insert an in-core parity log (*data) into the head of a disk queue
160	 * (*head, *tail). NON-BLOCKING */
161
162	raidPtr = data->common->raidPtr;
163	if (rf_parityLogDebug)
164		printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
165	RF_ASSERT(data->prev == NULL);
166	RF_ASSERT(data->next == NULL);
167	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
168	if (*head) {
169		/* insert into head of queue */
170		RF_ASSERT((*head)->prev == NULL);
171		RF_ASSERT((*tail)->next == NULL);
172		data->next = *head;
173		(*head)->prev = data;
174		*head = data;
175	} else {
176		/* insert into empty list */
177		RF_ASSERT(*head == NULL);
178		RF_ASSERT(*tail == NULL);
179		*head = data;
180		*tail = data;
181	}
182	RF_ASSERT((*head)->prev == NULL);
183	RF_ASSERT((*tail)->next == NULL);
184	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
185}
186
187static RF_ParityLogData_t *
188DequeueParityLogData(
189    RF_Raid_t * raidPtr,
190    RF_ParityLogData_t ** head,
191    RF_ParityLogData_t ** tail,
192    int ignoreLocks)
193{
194	RF_ParityLogData_t *data;
195
196	/* Remove and return an in-core parity log from the tail of a disk
197	 * queue (*head, *tail). NON-BLOCKING */
198
199	/* remove from tail, preserving FIFO order */
200	if (!ignoreLocks)
201		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
202	data = *tail;
203	if (data) {
204		if (*head == *tail) {
205			/* removing last item from queue */
206			*head = NULL;
207			*tail = NULL;
208		} else {
209			*tail = (*tail)->prev;
210			(*tail)->next = NULL;
211			RF_ASSERT((*head)->prev == NULL);
212			RF_ASSERT((*tail)->next == NULL);
213		}
214		data->next = NULL;
215		data->prev = NULL;
216		if (rf_parityLogDebug)
217			printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
218	}
219	if (*head) {
220		RF_ASSERT((*head)->prev == NULL);
221		RF_ASSERT((*tail)->next == NULL);
222	}
223	if (!ignoreLocks)
224		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
225	return (data);
226}
227
228
229static void
230RequeueParityLogData(
231    RF_ParityLogData_t * data,
232    RF_ParityLogData_t ** head,
233    RF_ParityLogData_t ** tail)
234{
235	RF_Raid_t *raidPtr;
236
237	/* Insert an in-core parity log (*data) into the tail of a disk queue
238	 * (*head, *tail). NON-BLOCKING */
239
240	raidPtr = data->common->raidPtr;
241	RF_ASSERT(data);
242	if (rf_parityLogDebug)
243		printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
244	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
245	if (*tail) {
246		/* append to tail of list */
247		data->prev = *tail;
248		data->next = NULL;
249		(*tail)->next = data;
250		*tail = data;
251	} else {
252		/* inserting into an empty list */
253		*head = data;
254		*tail = data;
255		(*head)->prev = NULL;
256		(*tail)->next = NULL;
257	}
258	RF_ASSERT((*head)->prev == NULL);
259	RF_ASSERT((*tail)->next == NULL);
260	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
261}
262
263RF_ParityLogData_t *
264rf_CreateParityLogData(
265    RF_ParityRecordType_t operation,
266    RF_PhysDiskAddr_t * pda,
267    caddr_t bufPtr,
268    RF_Raid_t * raidPtr,
269    int (*wakeFunc) (RF_DagNode_t * node, int status),
270    void *wakeArg,
271    RF_AccTraceEntry_t * tracerec,
272    RF_Etimer_t startTime)
273{
274	RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
275	RF_CommonLogData_t *common;
276	RF_PhysDiskAddr_t *diskAddress;
277	int     boundary, offset = 0;
278
279	/* Return an initialized struct of info to be logged. Build one item
280	 * per physical disk address, one item per region.
281	 *
282	 * NON-BLOCKING */
283
284	diskAddress = pda;
285	common = AllocParityLogCommonData(raidPtr);
286	RF_ASSERT(common);
287
288	common->operation = operation;
289	common->bufPtr = bufPtr;
290	common->raidPtr = raidPtr;
291	common->wakeFunc = wakeFunc;
292	common->wakeArg = wakeArg;
293	common->tracerec = tracerec;
294	common->startTime = startTime;
295	common->cnt = 0;
296
297	if (rf_parityLogDebug)
298		printf("[entering CreateParityLogData]\n");
299	while (diskAddress) {
300		common->cnt++;
301		data = AllocParityLogData(raidPtr);
302		RF_ASSERT(data);
303		data->common = common;
304		data->next = NULL;
305		data->prev = NULL;
306		data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
307		if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
308			/* disk address does not cross a region boundary */
309			data->diskAddress = *diskAddress;
310			data->bufOffset = offset;
311			offset = offset + diskAddress->numSector;
312			EnqueueParityLogData(data, &resultHead, &resultTail);
313			/* adjust disk address */
314			diskAddress = diskAddress->next;
315		} else {
316			/* disk address crosses a region boundary */
317			/* find address where region is crossed */
318			boundary = 0;
319			while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
320				boundary++;
321
322			/* enter data before the boundary */
323			data->diskAddress = *diskAddress;
324			data->diskAddress.numSector = boundary;
325			data->bufOffset = offset;
326			offset += boundary;
327			EnqueueParityLogData(data, &resultHead, &resultTail);
328			/* adjust disk address */
329			diskAddress->startSector += boundary;
330			diskAddress->numSector -= boundary;
331		}
332	}
333	if (rf_parityLogDebug)
334		printf("[leaving CreateParityLogData]\n");
335	return (resultHead);
336}
337
338
339RF_ParityLogData_t *
340rf_SearchAndDequeueParityLogData(
341    RF_Raid_t * raidPtr,
342    int regionID,
343    RF_ParityLogData_t ** head,
344    RF_ParityLogData_t ** tail,
345    int ignoreLocks)
346{
347	RF_ParityLogData_t *w;
348
349	/* Remove and return an in-core parity log from a specified region
350	 * (regionID). If a matching log is not found, return NULL.
351	 *
352	 * NON-BLOCKING. */
353
354	/* walk backward through a list, looking for an entry with a matching
355	 * region ID */
356	if (!ignoreLocks)
357		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
358	w = (*tail);
359	while (w) {
360		if (w->regionID == regionID) {
361			/* remove an element from the list */
362			if (w == *tail) {
363				if (*head == *tail) {
364					/* removing only element in the list */
365					*head = NULL;
366					*tail = NULL;
367				} else {
368					/* removing last item in the list */
369					*tail = (*tail)->prev;
370					(*tail)->next = NULL;
371					RF_ASSERT((*head)->prev == NULL);
372					RF_ASSERT((*tail)->next == NULL);
373				}
374			} else {
375				if (w == *head) {
376					/* removing first item in the list */
377					*head = (*head)->next;
378					(*head)->prev = NULL;
379					RF_ASSERT((*head)->prev == NULL);
380					RF_ASSERT((*tail)->next == NULL);
381				} else {
382					/* removing an item from the middle of
383					 * the list */
384					w->prev->next = w->next;
385					w->next->prev = w->prev;
386					RF_ASSERT((*head)->prev == NULL);
387					RF_ASSERT((*tail)->next == NULL);
388				}
389			}
390			w->prev = NULL;
391			w->next = NULL;
392			if (rf_parityLogDebug)
393				printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
394			return (w);
395		} else
396			w = w->prev;
397	}
398	if (!ignoreLocks)
399		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
400	return (NULL);
401}
402
403static RF_ParityLogData_t *
404DequeueMatchingLogData(
405    RF_Raid_t * raidPtr,
406    RF_ParityLogData_t ** head,
407    RF_ParityLogData_t ** tail)
408{
409	RF_ParityLogData_t *logDataList, *logData;
410	int     regionID;
411
412	/* Remove and return an in-core parity log from the tail of a disk
413	 * queue (*head, *tail).  Then remove all matching (identical
414	 * regionIDs) logData and return as a linked list.
415	 *
416	 * NON-BLOCKING */
417
418	logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
419	if (logDataList) {
420		regionID = logDataList->regionID;
421		logData = logDataList;
422		logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
423		while (logData->next) {
424			logData = logData->next;
425			logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
426		}
427	}
428	return (logDataList);
429}
430
431
432static RF_ParityLog_t *
433AcquireParityLog(
434    RF_ParityLogData_t * logData,
435    int finish)
436{
437	RF_ParityLog_t *log = NULL;
438	RF_Raid_t *raidPtr;
439
440	/* Grab a log buffer from the pool and return it. If no buffers are
441	 * available, return NULL. NON-BLOCKING */
442	raidPtr = logData->common->raidPtr;
443	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
444	if (raidPtr->parityLogPool.parityLogs) {
445		log = raidPtr->parityLogPool.parityLogs;
446		raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
447		log->regionID = logData->regionID;
448		log->numRecords = 0;
449		log->next = NULL;
450		raidPtr->logsInUse++;
451		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
452	} else {
453		/* no logs available, so place ourselves on the queue of work
454		 * waiting on log buffers this is done while
455		 * parityLogPool.mutex is held, to ensure synchronization with
456		 * ReleaseParityLogs. */
457		if (rf_parityLogDebug)
458			printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
459		if (finish)
460			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
461		else
462			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
463	}
464	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
465	return (log);
466}
467
468void
469rf_ReleaseParityLogs(
470    RF_Raid_t * raidPtr,
471    RF_ParityLog_t * firstLog)
472{
473	RF_ParityLogData_t *logDataList;
474	RF_ParityLog_t *log, *lastLog;
475	int     cnt;
476
477	/* Insert a linked list of parity logs (firstLog) to the free list
478	 * (parityLogPool.parityLogPool)
479	 *
480	 * NON-BLOCKING. */
481
482	RF_ASSERT(firstLog);
483
484	/* Before returning logs to global free list, service all requests
485	 * which are blocked on logs.  Holding mutexes for parityLogPool and
486	 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
487	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
488	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
489	logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
490	log = firstLog;
491	if (firstLog)
492		firstLog = firstLog->next;
493	log->numRecords = 0;
494	log->next = NULL;
495	while (logDataList && log) {
496		RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
497		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
498		rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
499		if (rf_parityLogDebug)
500			printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
501		if (log == NULL) {
502			log = firstLog;
503			if (firstLog) {
504				firstLog = firstLog->next;
505				log->numRecords = 0;
506				log->next = NULL;
507			}
508		}
509		RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
510		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
511		if (log)
512			logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
513	}
514	/* return remaining logs to pool */
515	if (log) {
516		log->next = firstLog;
517		firstLog = log;
518	}
519	if (firstLog) {
520		lastLog = firstLog;
521		raidPtr->logsInUse--;
522		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
523		while (lastLog->next) {
524			lastLog = lastLog->next;
525			raidPtr->logsInUse--;
526			RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
527		}
528		lastLog->next = raidPtr->parityLogPool.parityLogs;
529		raidPtr->parityLogPool.parityLogs = firstLog;
530		cnt = 0;
531		log = raidPtr->parityLogPool.parityLogs;
532		while (log) {
533			cnt++;
534			log = log->next;
535		}
536		RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
537	}
538	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
539	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
540}
541
542static void
543ReintLog(
544    RF_Raid_t * raidPtr,
545    int regionID,
546    RF_ParityLog_t * log)
547{
548	RF_ASSERT(log);
549
550	/* Insert an in-core parity log (log) into the disk queue of
551	 * reintegration work.  Set the flag (reintInProgress) for the
552	 * specified region (regionID) to indicate that reintegration is in
553	 * progress for this region. NON-BLOCKING */
554
555	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
556	raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE;	/* cleared when reint
557									 * complete */
558
559	if (rf_parityLogDebug)
560		printf("[requesting reintegration of region %d]\n", log->regionID);
561	/* move record to reintegration queue */
562	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
563	log->next = raidPtr->parityLogDiskQueue.reintQueue;
564	raidPtr->parityLogDiskQueue.reintQueue = log;
565	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
566	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
567	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
568}
569
570static void
571FlushLog(
572    RF_Raid_t * raidPtr,
573    RF_ParityLog_t * log)
574{
575	/* insert a core log (log) into a list of logs
576	 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
577	 * NON-BLOCKING */
578
579	RF_ASSERT(log);
580	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
581	RF_ASSERT(log->next == NULL);
582	/* move log to flush queue */
583	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
584	log->next = raidPtr->parityLogDiskQueue.flushQueue;
585	raidPtr->parityLogDiskQueue.flushQueue = log;
586	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
587	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
588}
589
590static int
591DumpParityLogToDisk(
592    int finish,
593    RF_ParityLogData_t * logData)
594{
595	int     i, diskCount, regionID = logData->regionID;
596	RF_ParityLog_t *log;
597	RF_Raid_t *raidPtr;
598
599	raidPtr = logData->common->raidPtr;
600
601	/* Move a core log to disk.  If the log disk is full, initiate
602	 * reintegration.
603	 *
604	 * Return (0) if we can enqueue the dump immediately, otherwise return
605	 * (1) to indicate we are blocked on reintegration and control of the
606	 * thread should be relinquished.
607	 *
608	 * Caller must hold regionInfo[regionID].mutex
609	 *
610	 * NON-BLOCKING */
611
612	if (rf_parityLogDebug)
613		printf("[dumping parity log to disk, region %d]\n", regionID);
614	log = raidPtr->regionInfo[regionID].coreLog;
615	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
616	RF_ASSERT(log->next == NULL);
617
618	/* if reintegration is in progress, must queue work */
619	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
620	if (raidPtr->regionInfo[regionID].reintInProgress) {
621		/* Can not proceed since this region is currently being
622		 * reintegrated. We can not block, so queue remaining work and
623		 * return */
624		if (rf_parityLogDebug)
625			printf("[region %d waiting on reintegration]\n", regionID);
626		/* XXX not sure about the use of finish - shouldn't this
627		 * always be "Enqueue"? */
628		if (finish)
629			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
630		else
631			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
632		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
633		return (1);	/* relenquish control of this thread */
634	}
635	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
636	raidPtr->regionInfo[regionID].coreLog = NULL;
637	if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
638		/* IMPORTANT!! this loop bound assumes region disk holds an
639		 * integral number of core logs */
640	{
641		/* update disk map for this region */
642		diskCount = raidPtr->regionInfo[regionID].diskCount;
643		for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
644			raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
645			raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
646		}
647		log->diskOffset = diskCount;
648		raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
649		FlushLog(raidPtr, log);
650	} else {
651		/* no room for log on disk, send it to disk manager and
652		 * request reintegration */
653		RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
654		ReintLog(raidPtr, regionID, log);
655	}
656	if (rf_parityLogDebug)
657		printf("[finished dumping parity log to disk, region %d]\n", regionID);
658	return (0);
659}
660
661int
662rf_ParityLogAppend(
663    RF_ParityLogData_t * logData,
664    int finish,
665    RF_ParityLog_t ** incomingLog,
666    int clearReintFlag)
667{
668	int     regionID, logItem, itemDone;
669	RF_ParityLogData_t *item;
670	int     punt, done = RF_FALSE;
671	RF_ParityLog_t *log;
672	RF_Raid_t *raidPtr;
673	RF_Etimer_t timer;
674	int     (*wakeFunc) (RF_DagNode_t * node, int status);
675	void   *wakeArg;
676
677	/* Add parity to the appropriate log, one sector at a time. This
678	 * routine is called is called by dag functions ParityLogUpdateFunc
679	 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
680	 *
681	 * Parity to be logged is contained in a linked-list (logData).  When
682	 * this routine returns, every sector in the list will be in one of
683	 * three places: 1) entered into the parity log 2) queued, waiting on
684	 * reintegration 3) queued, waiting on a core log
685	 *
686	 * Blocked work is passed to the ParityLoggingDiskManager for completion.
687	 * Later, as conditions which required the block are removed, the work
688	 * reenters this routine with the "finish" parameter set to "RF_TRUE."
689	 *
690	 * NON-BLOCKING */
691
692	raidPtr = logData->common->raidPtr;
693	/* lock the region for the first item in logData */
694	RF_ASSERT(logData != NULL);
695	regionID = logData->regionID;
696	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
697	RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
698
699	if (clearReintFlag) {
700		/* Enable flushing for this region.  Holding both locks
701		 * provides a synchronization barrier with DumpParityLogToDisk */
702		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
703		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
704		RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
705		raidPtr->regionInfo[regionID].diskCount = 0;
706		raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
707		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);	/* flushing is now
708										 * enabled */
709		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
710	}
711	/* process each item in logData */
712	while (logData) {
713		/* remove an item from logData */
714		item = logData;
715		logData = logData->next;
716		item->next = NULL;
717		item->prev = NULL;
718
719		if (rf_parityLogDebug)
720			printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
721
722		/* see if we moved to a new region */
723		if (regionID != item->regionID) {
724			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
725			regionID = item->regionID;
726			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
727			RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
728		}
729		punt = RF_FALSE;/* Set to RF_TRUE if work is blocked.  This
730				 * can happen in one of two ways: 1) no core
731				 * log (AcquireParityLog) 2) waiting on
732				 * reintegration (DumpParityLogToDisk) If punt
733				 * is RF_TRUE, the dataItem was queued, so
734				 * skip to next item. */
735
736		/* process item, one sector at a time, until all sectors
737		 * processed or we punt */
738		if (item->diskAddress.numSector > 0)
739			done = RF_FALSE;
740		else
741			RF_ASSERT(0);
742		while (!punt && !done) {
743			/* verify that a core log exists for this region */
744			if (!raidPtr->regionInfo[regionID].coreLog) {
745				/* Attempt to acquire a parity log. If
746				 * acquisition fails, queue remaining work in
747				 * data item and move to nextItem. */
748				if (incomingLog)
749					if (*incomingLog) {
750						RF_ASSERT((*incomingLog)->next == NULL);
751						raidPtr->regionInfo[regionID].coreLog = *incomingLog;
752						raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
753						*incomingLog = NULL;
754					} else
755						raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
756				else
757					raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
758				/* Note: AcquireParityLog either returns a log
759				 * or enqueues currentItem */
760			}
761			if (!raidPtr->regionInfo[regionID].coreLog)
762				punt = RF_TRUE;	/* failed to find a core log */
763			else {
764				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
765				/* verify that the log has room for new
766				 * entries */
767				/* if log is full, dump it to disk and grab a
768				 * new log */
769				if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
770					/* log is full, dump it to disk */
771					if (DumpParityLogToDisk(finish, item))
772						punt = RF_TRUE;	/* dump unsuccessful,
773								 * blocked on
774								 * reintegration */
775					else {
776						/* dump was successful */
777						if (incomingLog)
778							if (*incomingLog) {
779								RF_ASSERT((*incomingLog)->next == NULL);
780								raidPtr->regionInfo[regionID].coreLog = *incomingLog;
781								raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
782								*incomingLog = NULL;
783							} else
784								raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
785						else
786							raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
787						/* if a core log is not
788						 * available, must queue work
789						 * and return */
790						if (!raidPtr->regionInfo[regionID].coreLog)
791							punt = RF_TRUE;	/* blocked on log
792									 * availability */
793					}
794				}
795			}
796			/* if we didn't punt on this item, attempt to add a
797			 * sector to the core log */
798			if (!punt) {
799				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
800				/* at this point, we have a core log with
801				 * enough room for a sector */
802				/* copy a sector into the log */
803				log = raidPtr->regionInfo[regionID].coreLog;
804				RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
805				logItem = log->numRecords++;
806				log->records[logItem].parityAddr = item->diskAddress;
807				RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
808				RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
809				log->records[logItem].parityAddr.numSector = 1;
810				log->records[logItem].operation = item->common->operation;
811				bcopy((item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (1 << item->common->raidPtr->logBytesPerSector));
812				item->diskAddress.numSector--;
813				item->diskAddress.startSector++;
814				if (item->diskAddress.numSector == 0)
815					done = RF_TRUE;
816			}
817		}
818
819		if (!punt) {
820			/* Processed this item completely, decrement count of
821			 * items to be processed. */
822			RF_ASSERT(item->diskAddress.numSector == 0);
823			RF_LOCK_MUTEX(item->common->mutex);
824			item->common->cnt--;
825			if (item->common->cnt == 0)
826				itemDone = RF_TRUE;
827			else
828				itemDone = RF_FALSE;
829			RF_UNLOCK_MUTEX(item->common->mutex);
830			if (itemDone) {
831				/* Finished processing all log data for this
832				 * IO Return structs to free list and invoke
833				 * wakeup function. */
834				timer = item->common->startTime;	/* grab initial value of
835									 * timer */
836				RF_ETIMER_STOP(timer);
837				RF_ETIMER_EVAL(timer);
838				item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
839				if (rf_parityLogDebug)
840					printf("[waking process for region %d]\n", item->regionID);
841				wakeFunc = item->common->wakeFunc;
842				wakeArg = item->common->wakeArg;
843				FreeParityLogCommonData(item->common);
844				FreeParityLogData(item);
845				(wakeFunc) (wakeArg, 0);
846			} else
847				FreeParityLogData(item);
848		}
849	}
850	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
851	if (rf_parityLogDebug)
852		printf("[exiting ParityLogAppend]\n");
853	return (0);
854}
855
856
857void
858rf_EnableParityLogging(RF_Raid_t * raidPtr)
859{
860	int     regionID;
861
862	for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
863		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
864		raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
865		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
866	}
867	if (rf_parityLogDebug)
868		printf("[parity logging enabled]\n");
869}
870#endif				/* RF_INCLUDE_PARITYLOGGING > 0 */
871