rf_paritylog.c revision 1.2
1/*	$NetBSD: rf_paritylog.c,v 1.2 1999/01/26 02:33:59 oster Exp $	*/
2/*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29/* Code for manipulating in-core parity logs
30 *
31 */
32
33#include "rf_archs.h"
34
35#if RF_INCLUDE_PARITYLOGGING > 0
36
37/*
38 * Append-only log for recording parity "update" and "overwrite" records
39 */
40
41#include "rf_types.h"
42#include "rf_threadstuff.h"
43#include "rf_mcpair.h"
44#include "rf_raid.h"
45#include "rf_dag.h"
46#include "rf_dagfuncs.h"
47#include "rf_desc.h"
48#include "rf_layout.h"
49#include "rf_diskqueue.h"
50#include "rf_etimer.h"
51#include "rf_paritylog.h"
52#include "rf_general.h"
53#include "rf_threadid.h"
54#include "rf_map.h"
55#include "rf_paritylogging.h"
56#include "rf_paritylogDiskMgr.h"
57#include "rf_sys.h"
58
59static RF_CommonLogData_t *AllocParityLogCommonData(RF_Raid_t *raidPtr)
60{
61  RF_CommonLogData_t *common = NULL;
62  int rc;
63
64  /* Return a struct for holding common parity log information from the free
65     list (rf_parityLogDiskQueue.freeCommonList).  If the free list is empty, call
66     RF_Malloc to create a new structure.
67     NON-BLOCKING */
68
69  RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
70  if (raidPtr->parityLogDiskQueue.freeCommonList)
71    {
72      common = raidPtr->parityLogDiskQueue.freeCommonList;
73      raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
74      RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
75    }
76  else
77    {
78      RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
79      RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
80      rc = rf_mutex_init(&common->mutex);
81      if (rc) {
82        RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
83          __LINE__, rc);
84        RF_Free(common, sizeof(RF_CommonLogData_t));
85        common = NULL;
86      }
87    }
88  common->next = NULL;
89  return(common);
90}
91
92static void FreeParityLogCommonData(RF_CommonLogData_t *common)
93{
94  RF_Raid_t *raidPtr;
95
96  /* Insert a single struct for holding parity log information
97     (data) into the free list (rf_parityLogDiskQueue.freeCommonList).
98     NON-BLOCKING */
99
100  raidPtr = common->raidPtr;
101  RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
102  common->next = raidPtr->parityLogDiskQueue.freeCommonList;
103  raidPtr->parityLogDiskQueue.freeCommonList = common;
104  RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
105}
106
107static RF_ParityLogData_t *AllocParityLogData(RF_Raid_t *raidPtr)
108{
109  RF_ParityLogData_t *data = NULL;
110
111  /* Return a struct for holding parity log information from the free
112     list (rf_parityLogDiskQueue.freeList).  If the free list is empty, call
113     RF_Malloc to create a new structure.
114     NON-BLOCKING */
115
116  RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
117  if (raidPtr->parityLogDiskQueue.freeDataList)
118    {
119      data = raidPtr->parityLogDiskQueue.freeDataList;
120      raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
121      RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
122    }
123  else
124    {
125      RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
126      RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
127    }
128  data->next = NULL;
129  data->prev = NULL;
130  return(data);
131}
132
133
134static void FreeParityLogData(RF_ParityLogData_t *data)
135{
136  RF_ParityLogData_t *nextItem;
137  RF_Raid_t *raidPtr;
138
139  /* Insert a linked list of structs for holding parity log
140     information (data) into the free list (parityLogDiskQueue.freeList).
141     NON-BLOCKING */
142
143  raidPtr = data->common->raidPtr;
144  RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
145  while (data)
146    {
147      nextItem = data->next;
148      data->next = raidPtr->parityLogDiskQueue.freeDataList;
149      raidPtr->parityLogDiskQueue.freeDataList = data;
150      data = nextItem;
151    }
152  RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
153}
154
155
156static void EnqueueParityLogData(
157  RF_ParityLogData_t   *data,
158  RF_ParityLogData_t  **head,
159  RF_ParityLogData_t  **tail)
160{
161  RF_Raid_t *raidPtr;
162
163  /* Insert an in-core parity log (*data) into the head of
164     a disk queue (*head, *tail).
165     NON-BLOCKING */
166
167  raidPtr = data->common->raidPtr;
168  if (rf_parityLogDebug)
169    printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int)data->diskAddress.numSector);
170  RF_ASSERT(data->prev == NULL);
171  RF_ASSERT(data->next == NULL);
172  RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
173  if (*head)
174    {
175      /* insert into head of queue */
176      RF_ASSERT((*head)->prev == NULL);
177      RF_ASSERT((*tail)->next == NULL);
178      data->next = *head;
179      (*head)->prev = data;
180      *head = data;
181    }
182  else
183    {
184      /* insert into empty list */
185      RF_ASSERT(*head == NULL);
186      RF_ASSERT(*tail == NULL);
187      *head = data;
188      *tail = data;
189    }
190  RF_ASSERT((*head)->prev == NULL);
191  RF_ASSERT((*tail)->next == NULL);
192  RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
193}
194
195static RF_ParityLogData_t *DequeueParityLogData(
196  RF_Raid_t            *raidPtr,
197  RF_ParityLogData_t  **head,
198  RF_ParityLogData_t  **tail,
199  int                   ignoreLocks)
200{
201  RF_ParityLogData_t *data;
202
203  /* Remove and return an in-core parity log from the tail of
204     a disk queue (*head, *tail).
205     NON-BLOCKING */
206
207  /* remove from tail, preserving FIFO order */
208  if (!ignoreLocks)
209    RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
210  data = *tail;
211  if (data)
212    {
213      if (*head == *tail)
214	{
215	  /* removing last item from queue */
216	  *head = NULL;
217	  *tail = NULL;
218	}
219      else
220	{
221	  *tail = (*tail)->prev;
222	  (*tail)->next = NULL;
223	  RF_ASSERT((*head)->prev == NULL);
224	  RF_ASSERT((*tail)->next == NULL);
225	}
226      data->next = NULL;
227      data->prev = NULL;
228      if (rf_parityLogDebug)
229	printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int)data->diskAddress.numSector);
230    }
231  if (*head)
232    {
233      RF_ASSERT((*head)->prev == NULL);
234      RF_ASSERT((*tail)->next == NULL);
235    }
236  if (!ignoreLocks)
237    RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
238  return(data);
239}
240
241
242static void RequeueParityLogData(
243  RF_ParityLogData_t   *data,
244  RF_ParityLogData_t  **head,
245  RF_ParityLogData_t  **tail)
246{
247  RF_Raid_t *raidPtr;
248
249  /* Insert an in-core parity log (*data) into the tail of
250     a disk queue (*head, *tail).
251     NON-BLOCKING */
252
253  raidPtr = data->common->raidPtr;
254  RF_ASSERT(data);
255  if (rf_parityLogDebug)
256    printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
257  RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
258  if (*tail)
259    {
260      /* append to tail of list */
261      data->prev = *tail;
262      data->next = NULL;
263      (*tail)->next = data;
264      *tail = data;
265    }
266  else
267    {
268      /* inserting into an empty list */
269      *head = data;
270      *tail = data;
271      (*head)->prev = NULL;
272      (*tail)->next = NULL;
273    }
274  RF_ASSERT((*head)->prev == NULL);
275  RF_ASSERT((*tail)->next == NULL);
276  RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
277}
278
279RF_ParityLogData_t *rf_CreateParityLogData(
280  RF_ParityRecordType_t    operation,
281  RF_PhysDiskAddr_t       *pda,
282  caddr_t                  bufPtr,
283  RF_Raid_t               *raidPtr,
284  int                    (*wakeFunc)(RF_DagNode_t *node, int status),
285  void                    *wakeArg,
286  RF_AccTraceEntry_t      *tracerec,
287  RF_Etimer_t              startTime)
288{
289  RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
290  RF_CommonLogData_t *common;
291  RF_PhysDiskAddr_t *diskAddress;
292  int boundary, offset = 0;
293
294  /* Return an initialized struct of info to be logged.
295     Build one item per physical disk address, one item per region.
296
297     NON-BLOCKING */
298
299  diskAddress = pda;
300  common = AllocParityLogCommonData(raidPtr);
301  RF_ASSERT(common);
302
303  common->operation = operation;
304  common->bufPtr = bufPtr;
305  common->raidPtr = raidPtr;
306  common->wakeFunc = wakeFunc;
307  common->wakeArg = wakeArg;
308  common->tracerec = tracerec;
309  common->startTime = startTime;
310  common->cnt = 0;
311
312  if (rf_parityLogDebug)
313    printf("[entering CreateParityLogData]\n");
314  while (diskAddress)
315    {
316      common->cnt++;
317      data = AllocParityLogData(raidPtr);
318      RF_ASSERT(data);
319      data->common = common;
320      data->next = NULL;
321      data->prev = NULL;
322      data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
323      if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1))
324	{
325	  /* disk address does not cross a region boundary */
326	  data->diskAddress = *diskAddress;
327	  data->bufOffset = offset;
328	  offset = offset + diskAddress->numSector;
329	  EnqueueParityLogData(data, &resultHead, &resultTail);
330	  /* adjust disk address */
331	  diskAddress = diskAddress->next;
332	}
333      else
334	{
335	  /* disk address crosses a region boundary */
336	  /* find address where region is crossed */
337	  boundary = 0;
338	  while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
339	    boundary++;
340
341	  /* enter data before the boundary */
342	  data->diskAddress = *diskAddress;
343	  data->diskAddress.numSector = boundary;
344	  data->bufOffset = offset;
345	  offset += boundary;
346	  EnqueueParityLogData(data, &resultHead, &resultTail);
347	  /* adjust disk address */
348	  diskAddress->startSector += boundary;
349	  diskAddress->numSector -= boundary;
350	}
351    }
352  if (rf_parityLogDebug)
353    printf("[leaving CreateParityLogData]\n");
354  return(resultHead);
355}
356
357
358RF_ParityLogData_t *rf_SearchAndDequeueParityLogData(
359  RF_Raid_t            *raidPtr,
360  int                   regionID,
361  RF_ParityLogData_t  **head,
362  RF_ParityLogData_t  **tail,
363  int                   ignoreLocks)
364{
365  RF_ParityLogData_t *w;
366
367  /* Remove and return an in-core parity log from a specified region (regionID).
368     If a matching log is not found, return NULL.
369
370     NON-BLOCKING.
371     */
372
373  /* walk backward through a list, looking for an entry with a matching region ID */
374  if (!ignoreLocks)
375    RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
376  w = (*tail);
377  while (w)
378    {
379      if (w->regionID == regionID)
380	{
381	  /* remove an element from the list */
382	  if (w == *tail)
383	    {
384	      if (*head == *tail)
385		{
386		  /* removing only element in the list */
387		  *head = NULL;
388		  *tail = NULL;
389		}
390	      else
391		{
392		  /* removing last item in the list */
393		  *tail = (*tail)->prev;
394		  (*tail)->next = NULL;
395		  RF_ASSERT((*head)->prev == NULL);
396		  RF_ASSERT((*tail)->next == NULL);
397		}
398	    }
399	  else
400	    {
401	      if (w == *head)
402		{
403		  /* removing first item in the list */
404		  *head = (*head)->next;
405		  (*head)->prev = NULL;
406		  RF_ASSERT((*head)->prev == NULL);
407		  RF_ASSERT((*tail)->next == NULL);
408		}
409	      else
410		{
411		  /* removing an item from the middle of the list */
412		  w->prev->next = w->next;
413		  w->next->prev = w->prev;
414		  RF_ASSERT((*head)->prev == NULL);
415		  RF_ASSERT((*tail)->next == NULL);
416		}
417	    }
418	  w->prev = NULL;
419	  w->next = NULL;
420	  if (rf_parityLogDebug)
421	    printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n",w->regionID,(int)w->diskAddress.raidAddress,(int) w->diskAddress.numSector);
422	  return(w);
423	}
424      else
425	w = w->prev;
426    }
427  if (!ignoreLocks)
428    RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
429  return(NULL);
430}
431
432static RF_ParityLogData_t *DequeueMatchingLogData(
433  RF_Raid_t            *raidPtr,
434  RF_ParityLogData_t  **head,
435  RF_ParityLogData_t  **tail)
436{
437  RF_ParityLogData_t *logDataList, *logData;
438  int regionID;
439
440  /* Remove and return an in-core parity log from the tail of
441     a disk queue (*head, *tail).  Then remove all matching
442     (identical regionIDs) logData and return as a linked list.
443
444     NON-BLOCKING
445     */
446
447  logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
448  if (logDataList)
449    {
450      regionID = logDataList->regionID;
451      logData = logDataList;
452      logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
453      while (logData->next)
454	{
455	  logData = logData->next;
456	  logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
457	}
458    }
459  return(logDataList);
460}
461
462
463static RF_ParityLog_t *AcquireParityLog(
464  RF_ParityLogData_t  *logData,
465  int                  finish)
466{
467  RF_ParityLog_t *log = NULL;
468  RF_Raid_t *raidPtr;
469
470  /* Grab a log buffer from the pool and return it.
471     If no buffers are available, return NULL.
472     NON-BLOCKING
473     */
474  raidPtr = logData->common->raidPtr;
475  RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
476  if (raidPtr->parityLogPool.parityLogs)
477    {
478      log = raidPtr->parityLogPool.parityLogs;
479      raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
480      log->regionID = logData->regionID;
481      log->numRecords = 0;
482      log->next = NULL;
483      raidPtr->logsInUse++;
484      RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
485    }
486  else
487    {
488      /* no logs available, so place ourselves on the queue of work waiting on log buffers
489	 this is done while parityLogPool.mutex is held, to ensure synchronization
490	 with ReleaseParityLogs.
491	 */
492      if (rf_parityLogDebug)
493	printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
494      if (finish)
495	RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
496      else
497	EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
498    }
499  RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
500  return(log);
501}
502
503void rf_ReleaseParityLogs(
504  RF_Raid_t       *raidPtr,
505  RF_ParityLog_t  *firstLog)
506{
507  RF_ParityLogData_t *logDataList;
508  RF_ParityLog_t *log, *lastLog;
509  int cnt;
510
511  /* Insert a linked list of parity logs (firstLog) to
512     the free list (parityLogPool.parityLogPool)
513
514     NON-BLOCKING.
515     */
516
517  RF_ASSERT(firstLog);
518
519  /* Before returning logs to global free list, service all
520     requests which are blocked on logs.  Holding mutexes for parityLogPool and parityLogDiskQueue
521     forces synchronization with AcquireParityLog().
522     */
523  RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
524  RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
525  logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
526  log = firstLog;
527  if (firstLog)
528    firstLog = firstLog->next;
529  log->numRecords = 0;
530  log->next = NULL;
531  while (logDataList && log)
532    {
533      RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
534      RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
535      rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
536      if (rf_parityLogDebug)
537	printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
538      if (log == NULL)
539	{
540	  log = firstLog;
541	  if (firstLog)
542	    {
543	      firstLog = firstLog->next;
544	      log->numRecords = 0;
545	      log->next = NULL;
546	    }
547	}
548      RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
549      RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
550      if (log)
551	logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
552    }
553  /* return remaining logs to pool */
554  if (log)
555    {
556      log->next = firstLog;
557      firstLog = log;
558    }
559  if (firstLog)
560    {
561      lastLog = firstLog;
562      raidPtr->logsInUse--;
563      RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
564      while (lastLog->next)
565	{
566	  lastLog = lastLog->next;
567	  raidPtr->logsInUse--;
568	  RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
569	}
570      lastLog->next = raidPtr->parityLogPool.parityLogs;
571      raidPtr->parityLogPool.parityLogs = firstLog;
572      cnt = 0;
573      log = raidPtr->parityLogPool.parityLogs;
574      while (log)
575	{
576	  cnt++;
577	  log = log->next;
578	}
579      RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
580    }
581  RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
582  RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
583}
584
585static void ReintLog(
586  RF_Raid_t       *raidPtr,
587  int              regionID,
588  RF_ParityLog_t  *log)
589{
590  RF_ASSERT(log);
591
592  /* Insert an in-core parity log (log) into the disk queue of reintegration
593     work.  Set the flag (reintInProgress) for the specified region (regionID)
594     to indicate that reintegration is in progress for this region.
595     NON-BLOCKING
596     */
597
598  RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
599  raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE;  /* cleared when reint complete */
600
601  if (rf_parityLogDebug)
602    printf("[requesting reintegration of region %d]\n", log->regionID);
603  /* move record to reintegration queue */
604  RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
605  log->next = raidPtr->parityLogDiskQueue.reintQueue;
606  raidPtr->parityLogDiskQueue.reintQueue = log;
607  RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
608  RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
609  RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
610}
611
612static void FlushLog(
613  RF_Raid_t       *raidPtr,
614  RF_ParityLog_t  *log)
615{
616  /* insert a core log (log) into a list of logs (parityLogDiskQueue.flushQueue)
617     waiting to be written to disk.
618     NON-BLOCKING
619     */
620
621  RF_ASSERT(log);
622  RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
623  RF_ASSERT(log->next == NULL);
624  /* move log to flush queue */
625  RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
626  log->next = raidPtr->parityLogDiskQueue.flushQueue;
627  raidPtr->parityLogDiskQueue.flushQueue = log;
628  RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
629  RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
630}
631
632static int DumpParityLogToDisk(
633  int                  finish,
634  RF_ParityLogData_t  *logData)
635{
636  int i, diskCount, regionID = logData->regionID;
637  RF_ParityLog_t *log;
638  RF_Raid_t *raidPtr;
639
640  raidPtr = logData->common->raidPtr;
641
642  /* Move a core log to disk.  If the log disk is full, initiate
643     reintegration.
644
645     Return (0) if we can enqueue the dump immediately, otherwise
646     return (1) to indicate we are blocked on reintegration and
647     control of the thread should be relinquished.
648
649     Caller must hold regionInfo[regionID].mutex
650
651     NON-BLOCKING
652     */
653
654  if (rf_parityLogDebug)
655    printf("[dumping parity log to disk, region %d]\n", regionID);
656  log = raidPtr->regionInfo[regionID].coreLog;
657  RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
658  RF_ASSERT(log->next == NULL);
659
660  /* if reintegration is in progress, must queue work */
661  RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
662  if (raidPtr->regionInfo[regionID].reintInProgress)
663    {
664      /* Can not proceed since this region is currently being reintegrated.
665	 We can not block, so queue remaining work and return */
666      if (rf_parityLogDebug)
667	printf("[region %d waiting on reintegration]\n",regionID);
668      /* XXX not sure about the use of finish - shouldn't this always be "Enqueue"? */
669      if (finish)
670	RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
671      else
672	EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
673      RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
674      return(1);  /* relenquish control of this thread */
675    }
676  RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
677  raidPtr->regionInfo[regionID].coreLog = NULL;
678  if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
679    /* IMPORTANT!! this loop bound assumes region disk holds an integral number of core logs */
680    {
681      /* update disk map for this region */
682      diskCount = raidPtr->regionInfo[regionID].diskCount;
683      for (i = 0; i < raidPtr->numSectorsPerLog; i++)
684	{
685	  raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
686	  raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
687	}
688      log->diskOffset = diskCount;
689      raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
690      FlushLog(raidPtr, log);
691    }
692  else
693    {
694      /* no room for log on disk, send it to disk manager and request reintegration */
695      RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
696      ReintLog(raidPtr, regionID, log);
697    }
698  if (rf_parityLogDebug)
699    printf("[finished dumping parity log to disk, region %d]\n", regionID);
700  return(0);
701}
702
703int rf_ParityLogAppend(
704  RF_ParityLogData_t   *logData,
705  int                   finish,
706  RF_ParityLog_t      **incomingLog,
707  int                   clearReintFlag)
708{
709  int regionID, logItem, itemDone;
710  RF_ParityLogData_t *item;
711  int punt, done = RF_FALSE;
712  RF_ParityLog_t *log;
713  RF_Raid_t *raidPtr;
714  RF_Etimer_t timer;
715  int (*wakeFunc)(RF_DagNode_t *node, int status);
716  void *wakeArg;
717
718  /* Add parity to the appropriate log, one sector at a time.
719     This routine is called is called by dag functions ParityLogUpdateFunc
720     and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
721
722     Parity to be logged is contained in a linked-list (logData).  When
723     this routine returns, every sector in the list will be in one of
724     three places:
725       1) entered into the parity log
726       2) queued, waiting on reintegration
727       3) queued, waiting on a core log
728
729     Blocked work is passed to the ParityLoggingDiskManager for completion.
730     Later, as conditions which required the block are removed, the work
731     reenters this routine with the "finish" parameter set to "RF_TRUE."
732
733     NON-BLOCKING
734     */
735
736  raidPtr = logData->common->raidPtr;
737  /* lock the region for the first item in logData */
738  RF_ASSERT(logData != NULL);
739  regionID = logData->regionID;
740  RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
741  RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
742
743  if (clearReintFlag)
744    {
745      /* Enable flushing for this region.  Holding both locks provides
746	 a synchronization barrier with DumpParityLogToDisk
747	 */
748      RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
749      RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
750      RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
751      raidPtr->regionInfo[regionID].diskCount = 0;
752      raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
753      RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now enabled */
754      RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
755    }
756
757  /* process each item in logData */
758  while (logData)
759    {
760      /* remove an item from logData */
761      item = logData;
762      logData = logData->next;
763      item->next = NULL;
764      item->prev = NULL;
765
766      if (rf_parityLogDebug)
767	printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n",item->regionID,(int)item->diskAddress.raidAddress, (int)item->diskAddress.numSector);
768
769      /* see if we moved to a new region */
770      if (regionID != item->regionID)
771	{
772	  RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
773	  regionID = item->regionID;
774	  RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
775	  RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
776	}
777
778      punt = RF_FALSE;  /* Set to RF_TRUE if work is blocked.  This can happen in one of two ways:
779		          1) no core log (AcquireParityLog)
780			  2) waiting on reintegration (DumpParityLogToDisk)
781			If punt is RF_TRUE, the dataItem was queued, so skip to next item.
782			*/
783
784      /* process item, one sector at a time, until all sectors processed or we punt */
785      if (item->diskAddress.numSector > 0)
786	done = RF_FALSE;
787      else
788	RF_ASSERT(0);
789      while (!punt && !done)
790	{
791	  /* verify that a core log exists for this region */
792	  if (!raidPtr->regionInfo[regionID].coreLog)
793	    {
794	      /* Attempt to acquire a parity log.
795		 If acquisition fails, queue remaining work in data item and move to nextItem.
796		 */
797	      if (incomingLog)
798		if (*incomingLog)
799		  {
800		    RF_ASSERT((*incomingLog)->next == NULL);
801		    raidPtr->regionInfo[regionID].coreLog = *incomingLog;
802		    raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
803		    *incomingLog = NULL;
804		  }
805		else
806		  raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
807	      else
808		raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
809	      /* Note: AcquireParityLog either returns a log or enqueues currentItem */
810	    }
811	  if (!raidPtr->regionInfo[regionID].coreLog)
812	    punt = RF_TRUE; /* failed to find a core log */
813	  else
814	    {
815	      RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
816	      /* verify that the log has room for new entries */
817	      /* if log is full, dump it to disk and grab a new log */
818	      if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog)
819		{
820		  /* log is full, dump it to disk */
821		  if (DumpParityLogToDisk(finish, item))
822		    punt = RF_TRUE; /* dump unsuccessful, blocked on reintegration */
823		  else
824		    {
825		      /* dump was successful */
826		      if (incomingLog)
827			if (*incomingLog)
828			  {
829			    RF_ASSERT((*incomingLog)->next == NULL);
830			    raidPtr->regionInfo[regionID].coreLog = *incomingLog;
831			    raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
832			    *incomingLog = NULL;
833			  }
834			else
835			  raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
836		      else
837			raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
838		      /* if a core log is not available, must queue work and return */
839		      if (!raidPtr->regionInfo[regionID].coreLog)
840			punt = RF_TRUE; /* blocked on log availability */
841		    }
842		}
843	    }
844	  /* if we didn't punt on this item, attempt to add a sector to the core log */
845	  if (!punt)
846	    {
847	      RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
848	      /* at this point, we have a core log with enough room for a sector */
849	      /* copy a sector into the log */
850	      log = raidPtr->regionInfo[regionID].coreLog;
851	      RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
852	      logItem = log->numRecords++;
853	      log->records[logItem].parityAddr = item->diskAddress;
854	      RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
855	      RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
856	      log->records[logItem].parityAddr.numSector = 1;
857	      log->records[logItem].operation = item->common->operation;
858	      bcopy((item->common->bufPtr + (item->bufOffset++ * (1<<item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1<<item->common->raidPtr->logBytesPerSector)), (1<<item->common->raidPtr->logBytesPerSector));
859	      item->diskAddress.numSector--;
860	      item->diskAddress.startSector++;
861	      if (item->diskAddress.numSector == 0)
862		done = RF_TRUE;
863	    }
864	}
865
866      if (!punt)
867	{
868	  /* Processed this item completely, decrement count of items
869	     to be processed.
870	     */
871	  RF_ASSERT(item->diskAddress.numSector == 0);
872	  RF_LOCK_MUTEX(item->common->mutex);
873	  item->common->cnt--;
874	  if (item->common->cnt == 0)
875	    itemDone = RF_TRUE;
876	  else
877	    itemDone = RF_FALSE;
878	  RF_UNLOCK_MUTEX(item->common->mutex);
879	  if (itemDone)
880	    {
881	      /* Finished processing all log data for this IO
882		 Return structs to free list and invoke wakeup function.
883		 */
884	      timer = item->common->startTime;  /* grab initial value of timer */
885	      RF_ETIMER_STOP(timer);
886	      RF_ETIMER_EVAL(timer);
887	      item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
888	      if (rf_parityLogDebug)
889		printf("[waking process for region %d]\n", item->regionID);
890	      wakeFunc = item->common->wakeFunc;
891	      wakeArg = item->common->wakeArg;
892	      FreeParityLogCommonData(item->common);
893	      FreeParityLogData(item);
894	      (wakeFunc)(wakeArg, 0);
895	    }
896	  else
897	    FreeParityLogData(item);
898	}
899    }
900  RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
901  if (rf_parityLogDebug)
902    printf("[exiting ParityLogAppend]\n");
903  return(0);
904}
905
906
907void rf_EnableParityLogging(RF_Raid_t *raidPtr)
908{
909  int regionID;
910
911  for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
912    RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
913    raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
914    RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
915  }
916  if (rf_parityLogDebug)
917    printf("[parity logging enabled]\n");
918}
919
920#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
921