rf_paritylog.c revision 1.16
1/*	$NetBSD: rf_paritylog.c,v 1.16 2011/05/11 03:38:32 mrg Exp $	*/
2/*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29/* Code for manipulating in-core parity logs
30 *
31 */
32
33#include <sys/cdefs.h>
34__KERNEL_RCSID(0, "$NetBSD: rf_paritylog.c,v 1.16 2011/05/11 03:38:32 mrg Exp $");
35
36#include "rf_archs.h"
37
38#if RF_INCLUDE_PARITYLOGGING > 0
39
40/*
41 * Append-only log for recording parity "update" and "overwrite" records
42 */
43
44#include <dev/raidframe/raidframevar.h>
45
46#include "rf_threadstuff.h"
47#include "rf_mcpair.h"
48#include "rf_raid.h"
49#include "rf_dag.h"
50#include "rf_dagfuncs.h"
51#include "rf_desc.h"
52#include "rf_layout.h"
53#include "rf_diskqueue.h"
54#include "rf_etimer.h"
55#include "rf_paritylog.h"
56#include "rf_general.h"
57#include "rf_map.h"
58#include "rf_paritylogging.h"
59#include "rf_paritylogDiskMgr.h"
60
61static RF_CommonLogData_t *
62AllocParityLogCommonData(RF_Raid_t * raidPtr)
63{
64	RF_CommonLogData_t *common = NULL;
65
66	/* Return a struct for holding common parity log information from the
67	 * free list (rf_parityLogDiskQueue.freeCommonList).  If the free list
68	 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
69
70	rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
71	if (raidPtr->parityLogDiskQueue.freeCommonList) {
72		common = raidPtr->parityLogDiskQueue.freeCommonList;
73		raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
74		rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
75	} else {
76		rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
77		RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
78		/* destroy is in rf_paritylogging.c */
79		rf_init_mutex2(common->mutex, IPL_VM);
80	}
81	common->next = NULL;
82	return (common);
83}
84
85static void
86FreeParityLogCommonData(RF_CommonLogData_t * common)
87{
88	RF_Raid_t *raidPtr;
89
90	/* Insert a single struct for holding parity log information (data)
91	 * into the free list (rf_parityLogDiskQueue.freeCommonList).
92	 * NON-BLOCKING */
93
94	raidPtr = common->raidPtr;
95	rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
96	common->next = raidPtr->parityLogDiskQueue.freeCommonList;
97	raidPtr->parityLogDiskQueue.freeCommonList = common;
98	rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
99}
100
101static RF_ParityLogData_t *
102AllocParityLogData(RF_Raid_t * raidPtr)
103{
104	RF_ParityLogData_t *data = NULL;
105
106	/* Return a struct for holding parity log information from the free
107	 * list (rf_parityLogDiskQueue.freeList).  If the free list is empty,
108	 * call RF_Malloc to create a new structure. NON-BLOCKING */
109
110	rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
111	if (raidPtr->parityLogDiskQueue.freeDataList) {
112		data = raidPtr->parityLogDiskQueue.freeDataList;
113		raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
114		rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
115	} else {
116		rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
117		RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
118	}
119	data->next = NULL;
120	data->prev = NULL;
121	return (data);
122}
123
124
125static void
126FreeParityLogData(RF_ParityLogData_t * data)
127{
128	RF_ParityLogData_t *nextItem;
129	RF_Raid_t *raidPtr;
130
131	/* Insert a linked list of structs for holding parity log information
132	 * (data) into the free list (parityLogDiskQueue.freeList).
133	 * NON-BLOCKING */
134
135	raidPtr = data->common->raidPtr;
136	rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
137	while (data) {
138		nextItem = data->next;
139		data->next = raidPtr->parityLogDiskQueue.freeDataList;
140		raidPtr->parityLogDiskQueue.freeDataList = data;
141		data = nextItem;
142	}
143	rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
144}
145
146
147static void
148EnqueueParityLogData(
149    RF_ParityLogData_t * data,
150    RF_ParityLogData_t ** head,
151    RF_ParityLogData_t ** tail)
152{
153	RF_Raid_t *raidPtr;
154
155	/* Insert an in-core parity log (*data) into the head of a disk queue
156	 * (*head, *tail). NON-BLOCKING */
157
158	raidPtr = data->common->raidPtr;
159	if (rf_parityLogDebug)
160		printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
161	RF_ASSERT(data->prev == NULL);
162	RF_ASSERT(data->next == NULL);
163	rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
164	if (*head) {
165		/* insert into head of queue */
166		RF_ASSERT((*head)->prev == NULL);
167		RF_ASSERT((*tail)->next == NULL);
168		data->next = *head;
169		(*head)->prev = data;
170		*head = data;
171	} else {
172		/* insert into empty list */
173		RF_ASSERT(*head == NULL);
174		RF_ASSERT(*tail == NULL);
175		*head = data;
176		*tail = data;
177	}
178	RF_ASSERT((*head)->prev == NULL);
179	RF_ASSERT((*tail)->next == NULL);
180	rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
181}
182
183static RF_ParityLogData_t *
184DequeueParityLogData(
185    RF_Raid_t * raidPtr,
186    RF_ParityLogData_t ** head,
187    RF_ParityLogData_t ** tail,
188    int ignoreLocks)
189{
190	RF_ParityLogData_t *data;
191
192	/* Remove and return an in-core parity log from the tail of a disk
193	 * queue (*head, *tail). NON-BLOCKING */
194
195	/* remove from tail, preserving FIFO order */
196	if (!ignoreLocks)
197		rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
198	data = *tail;
199	if (data) {
200		if (*head == *tail) {
201			/* removing last item from queue */
202			*head = NULL;
203			*tail = NULL;
204		} else {
205			*tail = (*tail)->prev;
206			(*tail)->next = NULL;
207			RF_ASSERT((*head)->prev == NULL);
208			RF_ASSERT((*tail)->next == NULL);
209		}
210		data->next = NULL;
211		data->prev = NULL;
212		if (rf_parityLogDebug)
213			printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
214	}
215	if (*head) {
216		RF_ASSERT((*head)->prev == NULL);
217		RF_ASSERT((*tail)->next == NULL);
218	}
219	if (!ignoreLocks)
220		rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
221	return (data);
222}
223
224
225static void
226RequeueParityLogData(
227    RF_ParityLogData_t * data,
228    RF_ParityLogData_t ** head,
229    RF_ParityLogData_t ** tail)
230{
231	RF_Raid_t *raidPtr;
232
233	/* Insert an in-core parity log (*data) into the tail of a disk queue
234	 * (*head, *tail). NON-BLOCKING */
235
236	raidPtr = data->common->raidPtr;
237	RF_ASSERT(data);
238	if (rf_parityLogDebug)
239		printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
240	rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
241	if (*tail) {
242		/* append to tail of list */
243		data->prev = *tail;
244		data->next = NULL;
245		(*tail)->next = data;
246		*tail = data;
247	} else {
248		/* inserting into an empty list */
249		*head = data;
250		*tail = data;
251		(*head)->prev = NULL;
252		(*tail)->next = NULL;
253	}
254	RF_ASSERT((*head)->prev == NULL);
255	RF_ASSERT((*tail)->next == NULL);
256	rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
257}
258
259RF_ParityLogData_t *
260rf_CreateParityLogData(
261    RF_ParityRecordType_t operation,
262    RF_PhysDiskAddr_t * pda,
263    void *bufPtr,
264    RF_Raid_t * raidPtr,
265    int (*wakeFunc) (RF_DagNode_t * node, int status),
266    void *wakeArg,
267    RF_AccTraceEntry_t * tracerec,
268    RF_Etimer_t startTime)
269{
270	RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
271	RF_CommonLogData_t *common;
272	RF_PhysDiskAddr_t *diskAddress;
273	int     boundary, offset = 0;
274
275	/* Return an initialized struct of info to be logged. Build one item
276	 * per physical disk address, one item per region.
277	 *
278	 * NON-BLOCKING */
279
280	diskAddress = pda;
281	common = AllocParityLogCommonData(raidPtr);
282	RF_ASSERT(common);
283
284	common->operation = operation;
285	common->bufPtr = bufPtr;
286	common->raidPtr = raidPtr;
287	common->wakeFunc = wakeFunc;
288	common->wakeArg = wakeArg;
289	common->tracerec = tracerec;
290	common->startTime = startTime;
291	common->cnt = 0;
292
293	if (rf_parityLogDebug)
294		printf("[entering CreateParityLogData]\n");
295	while (diskAddress) {
296		common->cnt++;
297		data = AllocParityLogData(raidPtr);
298		RF_ASSERT(data);
299		data->common = common;
300		data->next = NULL;
301		data->prev = NULL;
302		data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
303		if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
304			/* disk address does not cross a region boundary */
305			data->diskAddress = *diskAddress;
306			data->bufOffset = offset;
307			offset = offset + diskAddress->numSector;
308			EnqueueParityLogData(data, &resultHead, &resultTail);
309			/* adjust disk address */
310			diskAddress = diskAddress->next;
311		} else {
312			/* disk address crosses a region boundary */
313			/* find address where region is crossed */
314			boundary = 0;
315			while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
316				boundary++;
317
318			/* enter data before the boundary */
319			data->diskAddress = *diskAddress;
320			data->diskAddress.numSector = boundary;
321			data->bufOffset = offset;
322			offset += boundary;
323			EnqueueParityLogData(data, &resultHead, &resultTail);
324			/* adjust disk address */
325			diskAddress->startSector += boundary;
326			diskAddress->numSector -= boundary;
327		}
328	}
329	if (rf_parityLogDebug)
330		printf("[leaving CreateParityLogData]\n");
331	return (resultHead);
332}
333
334
335RF_ParityLogData_t *
336rf_SearchAndDequeueParityLogData(
337    RF_Raid_t * raidPtr,
338    int regionID,
339    RF_ParityLogData_t ** head,
340    RF_ParityLogData_t ** tail,
341    int ignoreLocks)
342{
343	RF_ParityLogData_t *w;
344
345	/* Remove and return an in-core parity log from a specified region
346	 * (regionID). If a matching log is not found, return NULL.
347	 *
348	 * NON-BLOCKING. */
349
350	/* walk backward through a list, looking for an entry with a matching
351	 * region ID */
352	if (!ignoreLocks)
353		rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
354	w = (*tail);
355	while (w) {
356		if (w->regionID == regionID) {
357			/* remove an element from the list */
358			if (w == *tail) {
359				if (*head == *tail) {
360					/* removing only element in the list */
361					*head = NULL;
362					*tail = NULL;
363				} else {
364					/* removing last item in the list */
365					*tail = (*tail)->prev;
366					(*tail)->next = NULL;
367					RF_ASSERT((*head)->prev == NULL);
368					RF_ASSERT((*tail)->next == NULL);
369				}
370			} else {
371				if (w == *head) {
372					/* removing first item in the list */
373					*head = (*head)->next;
374					(*head)->prev = NULL;
375					RF_ASSERT((*head)->prev == NULL);
376					RF_ASSERT((*tail)->next == NULL);
377				} else {
378					/* removing an item from the middle of
379					 * the list */
380					w->prev->next = w->next;
381					w->next->prev = w->prev;
382					RF_ASSERT((*head)->prev == NULL);
383					RF_ASSERT((*tail)->next == NULL);
384				}
385			}
386			w->prev = NULL;
387			w->next = NULL;
388			if (rf_parityLogDebug)
389				printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
390			return (w);
391		} else
392			w = w->prev;
393	}
394	if (!ignoreLocks)
395		rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
396	return (NULL);
397}
398
399static RF_ParityLogData_t *
400DequeueMatchingLogData(
401    RF_Raid_t * raidPtr,
402    RF_ParityLogData_t ** head,
403    RF_ParityLogData_t ** tail)
404{
405	RF_ParityLogData_t *logDataList, *logData;
406	int     regionID;
407
408	/* Remove and return an in-core parity log from the tail of a disk
409	 * queue (*head, *tail).  Then remove all matching (identical
410	 * regionIDs) logData and return as a linked list.
411	 *
412	 * NON-BLOCKING */
413
414	logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
415	if (logDataList) {
416		regionID = logDataList->regionID;
417		logData = logDataList;
418		logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
419		while (logData->next) {
420			logData = logData->next;
421			logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
422		}
423	}
424	return (logDataList);
425}
426
427
428static RF_ParityLog_t *
429AcquireParityLog(
430    RF_ParityLogData_t * logData,
431    int finish)
432{
433	RF_ParityLog_t *log = NULL;
434	RF_Raid_t *raidPtr;
435
436	/* Grab a log buffer from the pool and return it. If no buffers are
437	 * available, return NULL. NON-BLOCKING */
438	raidPtr = logData->common->raidPtr;
439	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
440	if (raidPtr->parityLogPool.parityLogs) {
441		log = raidPtr->parityLogPool.parityLogs;
442		raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
443		log->regionID = logData->regionID;
444		log->numRecords = 0;
445		log->next = NULL;
446		raidPtr->logsInUse++;
447		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
448	} else {
449		/* no logs available, so place ourselves on the queue of work
450		 * waiting on log buffers this is done while
451		 * parityLogPool.mutex is held, to ensure synchronization with
452		 * ReleaseParityLogs. */
453		if (rf_parityLogDebug)
454			printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
455		if (finish)
456			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
457		else
458			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
459	}
460	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
461	return (log);
462}
463
464void
465rf_ReleaseParityLogs(
466    RF_Raid_t * raidPtr,
467    RF_ParityLog_t * firstLog)
468{
469	RF_ParityLogData_t *logDataList;
470	RF_ParityLog_t *log, *lastLog;
471	int     cnt;
472
473	/* Insert a linked list of parity logs (firstLog) to the free list
474	 * (parityLogPool.parityLogPool)
475	 *
476	 * NON-BLOCKING. */
477
478	RF_ASSERT(firstLog);
479
480	/* Before returning logs to global free list, service all requests
481	 * which are blocked on logs.  Holding mutexes for parityLogPool and
482	 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
483	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
484	rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
485	logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
486	log = firstLog;
487	if (firstLog)
488		firstLog = firstLog->next;
489	log->numRecords = 0;
490	log->next = NULL;
491	while (logDataList && log) {
492		RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
493		rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
494		rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
495		if (rf_parityLogDebug)
496			printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
497		if (log == NULL) {
498			log = firstLog;
499			if (firstLog) {
500				firstLog = firstLog->next;
501				log->numRecords = 0;
502				log->next = NULL;
503			}
504		}
505		RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
506		rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
507		if (log)
508			logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
509	}
510	/* return remaining logs to pool */
511	if (log) {
512		log->next = firstLog;
513		firstLog = log;
514	}
515	if (firstLog) {
516		lastLog = firstLog;
517		raidPtr->logsInUse--;
518		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
519		while (lastLog->next) {
520			lastLog = lastLog->next;
521			raidPtr->logsInUse--;
522			RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
523		}
524		lastLog->next = raidPtr->parityLogPool.parityLogs;
525		raidPtr->parityLogPool.parityLogs = firstLog;
526		cnt = 0;
527		log = raidPtr->parityLogPool.parityLogs;
528		while (log) {
529			cnt++;
530			log = log->next;
531		}
532		RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
533	}
534	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
535	rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
536}
537
538static void
539ReintLog(
540    RF_Raid_t * raidPtr,
541    int regionID,
542    RF_ParityLog_t * log)
543{
544	RF_ASSERT(log);
545
546	/* Insert an in-core parity log (log) into the disk queue of
547	 * reintegration work.  Set the flag (reintInProgress) for the
548	 * specified region (regionID) to indicate that reintegration is in
549	 * progress for this region. NON-BLOCKING */
550
551	rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex);
552	raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE;	/* cleared when reint
553									 * complete */
554
555	if (rf_parityLogDebug)
556		printf("[requesting reintegration of region %d]\n", log->regionID);
557	/* move record to reintegration queue */
558	rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
559	log->next = raidPtr->parityLogDiskQueue.reintQueue;
560	raidPtr->parityLogDiskQueue.reintQueue = log;
561	rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex);
562	rf_signal_cond2(raidPtr->parityLogDiskQueue.cond);
563	rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
564}
565
566static void
567FlushLog(
568    RF_Raid_t * raidPtr,
569    RF_ParityLog_t * log)
570{
571	/* insert a core log (log) into a list of logs
572	 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
573	 * NON-BLOCKING */
574
575	RF_ASSERT(log);
576	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
577	RF_ASSERT(log->next == NULL);
578	/* move log to flush queue */
579	rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
580	log->next = raidPtr->parityLogDiskQueue.flushQueue;
581	raidPtr->parityLogDiskQueue.flushQueue = log;
582	rf_signal_cond2(raidPtr->parityLogDiskQueue.cond);
583	rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
584}
585
586static int
587DumpParityLogToDisk(
588    int finish,
589    RF_ParityLogData_t * logData)
590{
591	int     i, diskCount, regionID = logData->regionID;
592	RF_ParityLog_t *log;
593	RF_Raid_t *raidPtr;
594
595	raidPtr = logData->common->raidPtr;
596
597	/* Move a core log to disk.  If the log disk is full, initiate
598	 * reintegration.
599	 *
600	 * Return (0) if we can enqueue the dump immediately, otherwise return
601	 * (1) to indicate we are blocked on reintegration and control of the
602	 * thread should be relinquished.
603	 *
604	 * Caller must hold regionInfo[regionID].mutex
605	 *
606	 * NON-BLOCKING */
607
608	if (rf_parityLogDebug)
609		printf("[dumping parity log to disk, region %d]\n", regionID);
610	log = raidPtr->regionInfo[regionID].coreLog;
611	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
612	RF_ASSERT(log->next == NULL);
613
614	/* if reintegration is in progress, must queue work */
615	rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex);
616	if (raidPtr->regionInfo[regionID].reintInProgress) {
617		/* Can not proceed since this region is currently being
618		 * reintegrated. We can not block, so queue remaining work and
619		 * return */
620		if (rf_parityLogDebug)
621			printf("[region %d waiting on reintegration]\n", regionID);
622		/* XXX not sure about the use of finish - shouldn't this
623		 * always be "Enqueue"? */
624		if (finish)
625			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
626		else
627			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
628		rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex);
629		return (1);	/* relenquish control of this thread */
630	}
631	rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex);
632	raidPtr->regionInfo[regionID].coreLog = NULL;
633	if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
634		/* IMPORTANT!! this loop bound assumes region disk holds an
635		 * integral number of core logs */
636	{
637		/* update disk map for this region */
638		diskCount = raidPtr->regionInfo[regionID].diskCount;
639		for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
640			raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
641			raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
642		}
643		log->diskOffset = diskCount;
644		raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
645		FlushLog(raidPtr, log);
646	} else {
647		/* no room for log on disk, send it to disk manager and
648		 * request reintegration */
649		RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
650		ReintLog(raidPtr, regionID, log);
651	}
652	if (rf_parityLogDebug)
653		printf("[finished dumping parity log to disk, region %d]\n", regionID);
654	return (0);
655}
656
657int
658rf_ParityLogAppend(
659    RF_ParityLogData_t * logData,
660    int finish,
661    RF_ParityLog_t ** incomingLog,
662    int clearReintFlag)
663{
664	int     regionID, logItem, itemDone;
665	RF_ParityLogData_t *item;
666	int     punt, done = RF_FALSE;
667	RF_ParityLog_t *log;
668	RF_Raid_t *raidPtr;
669	RF_Etimer_t timer;
670	int     (*wakeFunc) (RF_DagNode_t * node, int status);
671	void   *wakeArg;
672
673	/* Add parity to the appropriate log, one sector at a time. This
674	 * routine is called is called by dag functions ParityLogUpdateFunc
675	 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
676	 *
677	 * Parity to be logged is contained in a linked-list (logData).  When
678	 * this routine returns, every sector in the list will be in one of
679	 * three places: 1) entered into the parity log 2) queued, waiting on
680	 * reintegration 3) queued, waiting on a core log
681	 *
682	 * Blocked work is passed to the ParityLoggingDiskManager for completion.
683	 * Later, as conditions which required the block are removed, the work
684	 * reenters this routine with the "finish" parameter set to "RF_TRUE."
685	 *
686	 * NON-BLOCKING */
687
688	raidPtr = logData->common->raidPtr;
689	/* lock the region for the first item in logData */
690	RF_ASSERT(logData != NULL);
691	regionID = logData->regionID;
692	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
693	RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
694
695	if (clearReintFlag) {
696		/* Enable flushing for this region.  Holding both locks
697		 * provides a synchronization barrier with DumpParityLogToDisk */
698		rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex);
699		/* XXXmrg need this? */
700		rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
701		RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
702		raidPtr->regionInfo[regionID].diskCount = 0;
703		raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
704		rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex);	/* flushing is now
705										 * enabled */
706		/* XXXmrg need this? */
707		rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
708	}
709	/* process each item in logData */
710	while (logData) {
711		/* remove an item from logData */
712		item = logData;
713		logData = logData->next;
714		item->next = NULL;
715		item->prev = NULL;
716
717		if (rf_parityLogDebug)
718			printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
719
720		/* see if we moved to a new region */
721		if (regionID != item->regionID) {
722			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
723			regionID = item->regionID;
724			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
725			RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
726		}
727		punt = RF_FALSE;/* Set to RF_TRUE if work is blocked.  This
728				 * can happen in one of two ways: 1) no core
729				 * log (AcquireParityLog) 2) waiting on
730				 * reintegration (DumpParityLogToDisk) If punt
731				 * is RF_TRUE, the dataItem was queued, so
732				 * skip to next item. */
733
734		/* process item, one sector at a time, until all sectors
735		 * processed or we punt */
736		if (item->diskAddress.numSector > 0)
737			done = RF_FALSE;
738		else
739			RF_ASSERT(0);
740		while (!punt && !done) {
741			/* verify that a core log exists for this region */
742			if (!raidPtr->regionInfo[regionID].coreLog) {
743				/* Attempt to acquire a parity log. If
744				 * acquisition fails, queue remaining work in
745				 * data item and move to nextItem. */
746				if (incomingLog)
747					if (*incomingLog) {
748						RF_ASSERT((*incomingLog)->next == NULL);
749						raidPtr->regionInfo[regionID].coreLog = *incomingLog;
750						raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
751						*incomingLog = NULL;
752					} else
753						raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
754				else
755					raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
756				/* Note: AcquireParityLog either returns a log
757				 * or enqueues currentItem */
758			}
759			if (!raidPtr->regionInfo[regionID].coreLog)
760				punt = RF_TRUE;	/* failed to find a core log */
761			else {
762				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
763				/* verify that the log has room for new
764				 * entries */
765				/* if log is full, dump it to disk and grab a
766				 * new log */
767				if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
768					/* log is full, dump it to disk */
769					if (DumpParityLogToDisk(finish, item))
770						punt = RF_TRUE;	/* dump unsuccessful,
771								 * blocked on
772								 * reintegration */
773					else {
774						/* dump was successful */
775						if (incomingLog)
776							if (*incomingLog) {
777								RF_ASSERT((*incomingLog)->next == NULL);
778								raidPtr->regionInfo[regionID].coreLog = *incomingLog;
779								raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
780								*incomingLog = NULL;
781							} else
782								raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
783						else
784							raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
785						/* if a core log is not
786						 * available, must queue work
787						 * and return */
788						if (!raidPtr->regionInfo[regionID].coreLog)
789							punt = RF_TRUE;	/* blocked on log
790									 * availability */
791					}
792				}
793			}
794			/* if we didn't punt on this item, attempt to add a
795			 * sector to the core log */
796			if (!punt) {
797				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
798				/* at this point, we have a core log with
799				 * enough room for a sector */
800				/* copy a sector into the log */
801				log = raidPtr->regionInfo[regionID].coreLog;
802				RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
803				logItem = log->numRecords++;
804				log->records[logItem].parityAddr = item->diskAddress;
805				RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
806				RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
807				log->records[logItem].parityAddr.numSector = 1;
808				log->records[logItem].operation = item->common->operation;
809				memcpy((char *)log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), ((char *)item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), (1 << item->common->raidPtr->logBytesPerSector));
810				item->diskAddress.numSector--;
811				item->diskAddress.startSector++;
812				if (item->diskAddress.numSector == 0)
813					done = RF_TRUE;
814			}
815		}
816
817		if (!punt) {
818			/* Processed this item completely, decrement count of
819			 * items to be processed. */
820			RF_ASSERT(item->diskAddress.numSector == 0);
821			rf_lock_mutex2(item->common->mutex);
822			item->common->cnt--;
823			if (item->common->cnt == 0)
824				itemDone = RF_TRUE;
825			else
826				itemDone = RF_FALSE;
827			rf_unlock_mutex2(item->common->mutex);
828			if (itemDone) {
829				/* Finished processing all log data for this
830				 * IO Return structs to free list and invoke
831				 * wakeup function. */
832				timer = item->common->startTime;	/* grab initial value of
833									 * timer */
834				RF_ETIMER_STOP(timer);
835				RF_ETIMER_EVAL(timer);
836				item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
837				if (rf_parityLogDebug)
838					printf("[waking process for region %d]\n", item->regionID);
839				wakeFunc = item->common->wakeFunc;
840				wakeArg = item->common->wakeArg;
841				FreeParityLogCommonData(item->common);
842				FreeParityLogData(item);
843				(wakeFunc) (wakeArg, 0);
844			} else
845				FreeParityLogData(item);
846		}
847	}
848	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
849	if (rf_parityLogDebug)
850		printf("[exiting ParityLogAppend]\n");
851	return (0);
852}
853
854
855void
856rf_EnableParityLogging(RF_Raid_t * raidPtr)
857{
858	int     regionID;
859
860	for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
861		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
862		raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
863		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
864	}
865	if (rf_parityLogDebug)
866		printf("[parity logging enabled]\n");
867}
868#endif				/* RF_INCLUDE_PARITYLOGGING > 0 */
869