rf_paritylog.c revision 1.5
1/* $NetBSD: rf_paritylog.c,v 1.5 2000/01/07 03:41:01 oster Exp $ */ 2/* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29/* Code for manipulating in-core parity logs 30 * 31 */ 32 33#include "rf_archs.h" 34 35#if RF_INCLUDE_PARITYLOGGING > 0 36 37/* 38 * Append-only log for recording parity "update" and "overwrite" records 39 */ 40 41#include "rf_types.h" 42#include "rf_threadstuff.h" 43#include "rf_mcpair.h" 44#include "rf_raid.h" 45#include "rf_dag.h" 46#include "rf_dagfuncs.h" 47#include "rf_desc.h" 48#include "rf_layout.h" 49#include "rf_diskqueue.h" 50#include "rf_etimer.h" 51#include "rf_paritylog.h" 52#include "rf_general.h" 53#include "rf_map.h" 54#include "rf_paritylogging.h" 55#include "rf_paritylogDiskMgr.h" 56 57static RF_CommonLogData_t * 58AllocParityLogCommonData(RF_Raid_t * raidPtr) 59{ 60 RF_CommonLogData_t *common = NULL; 61 int rc; 62 63 /* Return a struct for holding common parity log information from the 64 * free list (rf_parityLogDiskQueue.freeCommonList). If the free list 65 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */ 66 67 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 68 if (raidPtr->parityLogDiskQueue.freeCommonList) { 69 common = raidPtr->parityLogDiskQueue.freeCommonList; 70 raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next; 71 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 72 } else { 73 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 74 RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *)); 75 rc = rf_mutex_init(&common->mutex); 76 if (rc) { 77 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, 78 __LINE__, rc); 79 RF_Free(common, sizeof(RF_CommonLogData_t)); 80 common = NULL; 81 } 82 } 83 common->next = NULL; 84 return (common); 85} 86 87static void 88FreeParityLogCommonData(RF_CommonLogData_t * common) 89{ 90 RF_Raid_t *raidPtr; 91 92 /* Insert a single struct for holding parity log information (data) 93 * into the free list (rf_parityLogDiskQueue.freeCommonList). 94 * NON-BLOCKING */ 95 96 raidPtr = common->raidPtr; 97 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 98 common->next = raidPtr->parityLogDiskQueue.freeCommonList; 99 raidPtr->parityLogDiskQueue.freeCommonList = common; 100 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 101} 102 103static RF_ParityLogData_t * 104AllocParityLogData(RF_Raid_t * raidPtr) 105{ 106 RF_ParityLogData_t *data = NULL; 107 108 /* Return a struct for holding parity log information from the free 109 * list (rf_parityLogDiskQueue.freeList). If the free list is empty, 110 * call RF_Malloc to create a new structure. NON-BLOCKING */ 111 112 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 113 if (raidPtr->parityLogDiskQueue.freeDataList) { 114 data = raidPtr->parityLogDiskQueue.freeDataList; 115 raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next; 116 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 117 } else { 118 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 119 RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *)); 120 } 121 data->next = NULL; 122 data->prev = NULL; 123 return (data); 124} 125 126 127static void 128FreeParityLogData(RF_ParityLogData_t * data) 129{ 130 RF_ParityLogData_t *nextItem; 131 RF_Raid_t *raidPtr; 132 133 /* Insert a linked list of structs for holding parity log information 134 * (data) into the free list (parityLogDiskQueue.freeList). 135 * NON-BLOCKING */ 136 137 raidPtr = data->common->raidPtr; 138 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 139 while (data) { 140 nextItem = data->next; 141 data->next = raidPtr->parityLogDiskQueue.freeDataList; 142 raidPtr->parityLogDiskQueue.freeDataList = data; 143 data = nextItem; 144 } 145 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 146} 147 148 149static void 150EnqueueParityLogData( 151 RF_ParityLogData_t * data, 152 RF_ParityLogData_t ** head, 153 RF_ParityLogData_t ** tail) 154{ 155 RF_Raid_t *raidPtr; 156 157 /* Insert an in-core parity log (*data) into the head of a disk queue 158 * (*head, *tail). NON-BLOCKING */ 159 160 raidPtr = data->common->raidPtr; 161 if (rf_parityLogDebug) 162 printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 163 RF_ASSERT(data->prev == NULL); 164 RF_ASSERT(data->next == NULL); 165 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 166 if (*head) { 167 /* insert into head of queue */ 168 RF_ASSERT((*head)->prev == NULL); 169 RF_ASSERT((*tail)->next == NULL); 170 data->next = *head; 171 (*head)->prev = data; 172 *head = data; 173 } else { 174 /* insert into empty list */ 175 RF_ASSERT(*head == NULL); 176 RF_ASSERT(*tail == NULL); 177 *head = data; 178 *tail = data; 179 } 180 RF_ASSERT((*head)->prev == NULL); 181 RF_ASSERT((*tail)->next == NULL); 182 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 183} 184 185static RF_ParityLogData_t * 186DequeueParityLogData( 187 RF_Raid_t * raidPtr, 188 RF_ParityLogData_t ** head, 189 RF_ParityLogData_t ** tail, 190 int ignoreLocks) 191{ 192 RF_ParityLogData_t *data; 193 194 /* Remove and return an in-core parity log from the tail of a disk 195 * queue (*head, *tail). NON-BLOCKING */ 196 197 /* remove from tail, preserving FIFO order */ 198 if (!ignoreLocks) 199 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 200 data = *tail; 201 if (data) { 202 if (*head == *tail) { 203 /* removing last item from queue */ 204 *head = NULL; 205 *tail = NULL; 206 } else { 207 *tail = (*tail)->prev; 208 (*tail)->next = NULL; 209 RF_ASSERT((*head)->prev == NULL); 210 RF_ASSERT((*tail)->next == NULL); 211 } 212 data->next = NULL; 213 data->prev = NULL; 214 if (rf_parityLogDebug) 215 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 216 } 217 if (*head) { 218 RF_ASSERT((*head)->prev == NULL); 219 RF_ASSERT((*tail)->next == NULL); 220 } 221 if (!ignoreLocks) 222 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 223 return (data); 224} 225 226 227static void 228RequeueParityLogData( 229 RF_ParityLogData_t * data, 230 RF_ParityLogData_t ** head, 231 RF_ParityLogData_t ** tail) 232{ 233 RF_Raid_t *raidPtr; 234 235 /* Insert an in-core parity log (*data) into the tail of a disk queue 236 * (*head, *tail). NON-BLOCKING */ 237 238 raidPtr = data->common->raidPtr; 239 RF_ASSERT(data); 240 if (rf_parityLogDebug) 241 printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 242 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 243 if (*tail) { 244 /* append to tail of list */ 245 data->prev = *tail; 246 data->next = NULL; 247 (*tail)->next = data; 248 *tail = data; 249 } else { 250 /* inserting into an empty list */ 251 *head = data; 252 *tail = data; 253 (*head)->prev = NULL; 254 (*tail)->next = NULL; 255 } 256 RF_ASSERT((*head)->prev == NULL); 257 RF_ASSERT((*tail)->next == NULL); 258 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 259} 260 261RF_ParityLogData_t * 262rf_CreateParityLogData( 263 RF_ParityRecordType_t operation, 264 RF_PhysDiskAddr_t * pda, 265 caddr_t bufPtr, 266 RF_Raid_t * raidPtr, 267 int (*wakeFunc) (RF_DagNode_t * node, int status), 268 void *wakeArg, 269 RF_AccTraceEntry_t * tracerec, 270 RF_Etimer_t startTime) 271{ 272 RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL; 273 RF_CommonLogData_t *common; 274 RF_PhysDiskAddr_t *diskAddress; 275 int boundary, offset = 0; 276 277 /* Return an initialized struct of info to be logged. Build one item 278 * per physical disk address, one item per region. 279 * 280 * NON-BLOCKING */ 281 282 diskAddress = pda; 283 common = AllocParityLogCommonData(raidPtr); 284 RF_ASSERT(common); 285 286 common->operation = operation; 287 common->bufPtr = bufPtr; 288 common->raidPtr = raidPtr; 289 common->wakeFunc = wakeFunc; 290 common->wakeArg = wakeArg; 291 common->tracerec = tracerec; 292 common->startTime = startTime; 293 common->cnt = 0; 294 295 if (rf_parityLogDebug) 296 printf("[entering CreateParityLogData]\n"); 297 while (diskAddress) { 298 common->cnt++; 299 data = AllocParityLogData(raidPtr); 300 RF_ASSERT(data); 301 data->common = common; 302 data->next = NULL; 303 data->prev = NULL; 304 data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector); 305 if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) { 306 /* disk address does not cross a region boundary */ 307 data->diskAddress = *diskAddress; 308 data->bufOffset = offset; 309 offset = offset + diskAddress->numSector; 310 EnqueueParityLogData(data, &resultHead, &resultTail); 311 /* adjust disk address */ 312 diskAddress = diskAddress->next; 313 } else { 314 /* disk address crosses a region boundary */ 315 /* find address where region is crossed */ 316 boundary = 0; 317 while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary)) 318 boundary++; 319 320 /* enter data before the boundary */ 321 data->diskAddress = *diskAddress; 322 data->diskAddress.numSector = boundary; 323 data->bufOffset = offset; 324 offset += boundary; 325 EnqueueParityLogData(data, &resultHead, &resultTail); 326 /* adjust disk address */ 327 diskAddress->startSector += boundary; 328 diskAddress->numSector -= boundary; 329 } 330 } 331 if (rf_parityLogDebug) 332 printf("[leaving CreateParityLogData]\n"); 333 return (resultHead); 334} 335 336 337RF_ParityLogData_t * 338rf_SearchAndDequeueParityLogData( 339 RF_Raid_t * raidPtr, 340 int regionID, 341 RF_ParityLogData_t ** head, 342 RF_ParityLogData_t ** tail, 343 int ignoreLocks) 344{ 345 RF_ParityLogData_t *w; 346 347 /* Remove and return an in-core parity log from a specified region 348 * (regionID). If a matching log is not found, return NULL. 349 * 350 * NON-BLOCKING. */ 351 352 /* walk backward through a list, looking for an entry with a matching 353 * region ID */ 354 if (!ignoreLocks) 355 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 356 w = (*tail); 357 while (w) { 358 if (w->regionID == regionID) { 359 /* remove an element from the list */ 360 if (w == *tail) { 361 if (*head == *tail) { 362 /* removing only element in the list */ 363 *head = NULL; 364 *tail = NULL; 365 } else { 366 /* removing last item in the list */ 367 *tail = (*tail)->prev; 368 (*tail)->next = NULL; 369 RF_ASSERT((*head)->prev == NULL); 370 RF_ASSERT((*tail)->next == NULL); 371 } 372 } else { 373 if (w == *head) { 374 /* removing first item in the list */ 375 *head = (*head)->next; 376 (*head)->prev = NULL; 377 RF_ASSERT((*head)->prev == NULL); 378 RF_ASSERT((*tail)->next == NULL); 379 } else { 380 /* removing an item from the middle of 381 * the list */ 382 w->prev->next = w->next; 383 w->next->prev = w->prev; 384 RF_ASSERT((*head)->prev == NULL); 385 RF_ASSERT((*tail)->next == NULL); 386 } 387 } 388 w->prev = NULL; 389 w->next = NULL; 390 if (rf_parityLogDebug) 391 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector); 392 return (w); 393 } else 394 w = w->prev; 395 } 396 if (!ignoreLocks) 397 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 398 return (NULL); 399} 400 401static RF_ParityLogData_t * 402DequeueMatchingLogData( 403 RF_Raid_t * raidPtr, 404 RF_ParityLogData_t ** head, 405 RF_ParityLogData_t ** tail) 406{ 407 RF_ParityLogData_t *logDataList, *logData; 408 int regionID; 409 410 /* Remove and return an in-core parity log from the tail of a disk 411 * queue (*head, *tail). Then remove all matching (identical 412 * regionIDs) logData and return as a linked list. 413 * 414 * NON-BLOCKING */ 415 416 logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE); 417 if (logDataList) { 418 regionID = logDataList->regionID; 419 logData = logDataList; 420 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); 421 while (logData->next) { 422 logData = logData->next; 423 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); 424 } 425 } 426 return (logDataList); 427} 428 429 430static RF_ParityLog_t * 431AcquireParityLog( 432 RF_ParityLogData_t * logData, 433 int finish) 434{ 435 RF_ParityLog_t *log = NULL; 436 RF_Raid_t *raidPtr; 437 438 /* Grab a log buffer from the pool and return it. If no buffers are 439 * available, return NULL. NON-BLOCKING */ 440 raidPtr = logData->common->raidPtr; 441 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 442 if (raidPtr->parityLogPool.parityLogs) { 443 log = raidPtr->parityLogPool.parityLogs; 444 raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next; 445 log->regionID = logData->regionID; 446 log->numRecords = 0; 447 log->next = NULL; 448 raidPtr->logsInUse++; 449 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 450 } else { 451 /* no logs available, so place ourselves on the queue of work 452 * waiting on log buffers this is done while 453 * parityLogPool.mutex is held, to ensure synchronization with 454 * ReleaseParityLogs. */ 455 if (rf_parityLogDebug) 456 printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish); 457 if (finish) 458 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 459 else 460 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 461 } 462 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 463 return (log); 464} 465 466void 467rf_ReleaseParityLogs( 468 RF_Raid_t * raidPtr, 469 RF_ParityLog_t * firstLog) 470{ 471 RF_ParityLogData_t *logDataList; 472 RF_ParityLog_t *log, *lastLog; 473 int cnt; 474 475 /* Insert a linked list of parity logs (firstLog) to the free list 476 * (parityLogPool.parityLogPool) 477 * 478 * NON-BLOCKING. */ 479 480 RF_ASSERT(firstLog); 481 482 /* Before returning logs to global free list, service all requests 483 * which are blocked on logs. Holding mutexes for parityLogPool and 484 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */ 485 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 486 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 487 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 488 log = firstLog; 489 if (firstLog) 490 firstLog = firstLog->next; 491 log->numRecords = 0; 492 log->next = NULL; 493 while (logDataList && log) { 494 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 495 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 496 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE); 497 if (rf_parityLogDebug) 498 printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID); 499 if (log == NULL) { 500 log = firstLog; 501 if (firstLog) { 502 firstLog = firstLog->next; 503 log->numRecords = 0; 504 log->next = NULL; 505 } 506 } 507 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 508 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 509 if (log) 510 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 511 } 512 /* return remaining logs to pool */ 513 if (log) { 514 log->next = firstLog; 515 firstLog = log; 516 } 517 if (firstLog) { 518 lastLog = firstLog; 519 raidPtr->logsInUse--; 520 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 521 while (lastLog->next) { 522 lastLog = lastLog->next; 523 raidPtr->logsInUse--; 524 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 525 } 526 lastLog->next = raidPtr->parityLogPool.parityLogs; 527 raidPtr->parityLogPool.parityLogs = firstLog; 528 cnt = 0; 529 log = raidPtr->parityLogPool.parityLogs; 530 while (log) { 531 cnt++; 532 log = log->next; 533 } 534 RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs); 535 } 536 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 537 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 538} 539 540static void 541ReintLog( 542 RF_Raid_t * raidPtr, 543 int regionID, 544 RF_ParityLog_t * log) 545{ 546 RF_ASSERT(log); 547 548 /* Insert an in-core parity log (log) into the disk queue of 549 * reintegration work. Set the flag (reintInProgress) for the 550 * specified region (regionID) to indicate that reintegration is in 551 * progress for this region. NON-BLOCKING */ 552 553 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 554 raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint 555 * complete */ 556 557 if (rf_parityLogDebug) 558 printf("[requesting reintegration of region %d]\n", log->regionID); 559 /* move record to reintegration queue */ 560 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 561 log->next = raidPtr->parityLogDiskQueue.reintQueue; 562 raidPtr->parityLogDiskQueue.reintQueue = log; 563 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 564 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 565 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); 566} 567 568static void 569FlushLog( 570 RF_Raid_t * raidPtr, 571 RF_ParityLog_t * log) 572{ 573 /* insert a core log (log) into a list of logs 574 * (parityLogDiskQueue.flushQueue) waiting to be written to disk. 575 * NON-BLOCKING */ 576 577 RF_ASSERT(log); 578 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); 579 RF_ASSERT(log->next == NULL); 580 /* move log to flush queue */ 581 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 582 log->next = raidPtr->parityLogDiskQueue.flushQueue; 583 raidPtr->parityLogDiskQueue.flushQueue = log; 584 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 585 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); 586} 587 588static int 589DumpParityLogToDisk( 590 int finish, 591 RF_ParityLogData_t * logData) 592{ 593 int i, diskCount, regionID = logData->regionID; 594 RF_ParityLog_t *log; 595 RF_Raid_t *raidPtr; 596 597 raidPtr = logData->common->raidPtr; 598 599 /* Move a core log to disk. If the log disk is full, initiate 600 * reintegration. 601 * 602 * Return (0) if we can enqueue the dump immediately, otherwise return 603 * (1) to indicate we are blocked on reintegration and control of the 604 * thread should be relinquished. 605 * 606 * Caller must hold regionInfo[regionID].mutex 607 * 608 * NON-BLOCKING */ 609 610 if (rf_parityLogDebug) 611 printf("[dumping parity log to disk, region %d]\n", regionID); 612 log = raidPtr->regionInfo[regionID].coreLog; 613 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); 614 RF_ASSERT(log->next == NULL); 615 616 /* if reintegration is in progress, must queue work */ 617 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 618 if (raidPtr->regionInfo[regionID].reintInProgress) { 619 /* Can not proceed since this region is currently being 620 * reintegrated. We can not block, so queue remaining work and 621 * return */ 622 if (rf_parityLogDebug) 623 printf("[region %d waiting on reintegration]\n", regionID); 624 /* XXX not sure about the use of finish - shouldn't this 625 * always be "Enqueue"? */ 626 if (finish) 627 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); 628 else 629 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); 630 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 631 return (1); /* relenquish control of this thread */ 632 } 633 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 634 raidPtr->regionInfo[regionID].coreLog = NULL; 635 if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity) 636 /* IMPORTANT!! this loop bound assumes region disk holds an 637 * integral number of core logs */ 638 { 639 /* update disk map for this region */ 640 diskCount = raidPtr->regionInfo[regionID].diskCount; 641 for (i = 0; i < raidPtr->numSectorsPerLog; i++) { 642 raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation; 643 raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr; 644 } 645 log->diskOffset = diskCount; 646 raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog; 647 FlushLog(raidPtr, log); 648 } else { 649 /* no room for log on disk, send it to disk manager and 650 * request reintegration */ 651 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity); 652 ReintLog(raidPtr, regionID, log); 653 } 654 if (rf_parityLogDebug) 655 printf("[finished dumping parity log to disk, region %d]\n", regionID); 656 return (0); 657} 658 659int 660rf_ParityLogAppend( 661 RF_ParityLogData_t * logData, 662 int finish, 663 RF_ParityLog_t ** incomingLog, 664 int clearReintFlag) 665{ 666 int regionID, logItem, itemDone; 667 RF_ParityLogData_t *item; 668 int punt, done = RF_FALSE; 669 RF_ParityLog_t *log; 670 RF_Raid_t *raidPtr; 671 RF_Etimer_t timer; 672 int (*wakeFunc) (RF_DagNode_t * node, int status); 673 void *wakeArg; 674 675 /* Add parity to the appropriate log, one sector at a time. This 676 * routine is called is called by dag functions ParityLogUpdateFunc 677 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING. 678 * 679 * Parity to be logged is contained in a linked-list (logData). When 680 * this routine returns, every sector in the list will be in one of 681 * three places: 1) entered into the parity log 2) queued, waiting on 682 * reintegration 3) queued, waiting on a core log 683 * 684 * Blocked work is passed to the ParityLoggingDiskManager for completion. 685 * Later, as conditions which required the block are removed, the work 686 * reenters this routine with the "finish" parameter set to "RF_TRUE." 687 * 688 * NON-BLOCKING */ 689 690 raidPtr = logData->common->raidPtr; 691 /* lock the region for the first item in logData */ 692 RF_ASSERT(logData != NULL); 693 regionID = logData->regionID; 694 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 695 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); 696 697 if (clearReintFlag) { 698 /* Enable flushing for this region. Holding both locks 699 * provides a synchronization barrier with DumpParityLogToDisk */ 700 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 701 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 702 RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE); 703 raidPtr->regionInfo[regionID].diskCount = 0; 704 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; 705 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now 706 * enabled */ 707 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 708 } 709 /* process each item in logData */ 710 while (logData) { 711 /* remove an item from logData */ 712 item = logData; 713 logData = logData->next; 714 item->next = NULL; 715 item->prev = NULL; 716 717 if (rf_parityLogDebug) 718 printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector); 719 720 /* see if we moved to a new region */ 721 if (regionID != item->regionID) { 722 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 723 regionID = item->regionID; 724 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 725 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); 726 } 727 punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This 728 * can happen in one of two ways: 1) no core 729 * log (AcquireParityLog) 2) waiting on 730 * reintegration (DumpParityLogToDisk) If punt 731 * is RF_TRUE, the dataItem was queued, so 732 * skip to next item. */ 733 734 /* process item, one sector at a time, until all sectors 735 * processed or we punt */ 736 if (item->diskAddress.numSector > 0) 737 done = RF_FALSE; 738 else 739 RF_ASSERT(0); 740 while (!punt && !done) { 741 /* verify that a core log exists for this region */ 742 if (!raidPtr->regionInfo[regionID].coreLog) { 743 /* Attempt to acquire a parity log. If 744 * acquisition fails, queue remaining work in 745 * data item and move to nextItem. */ 746 if (incomingLog) 747 if (*incomingLog) { 748 RF_ASSERT((*incomingLog)->next == NULL); 749 raidPtr->regionInfo[regionID].coreLog = *incomingLog; 750 raidPtr->regionInfo[regionID].coreLog->regionID = regionID; 751 *incomingLog = NULL; 752 } else 753 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 754 else 755 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 756 /* Note: AcquireParityLog either returns a log 757 * or enqueues currentItem */ 758 } 759 if (!raidPtr->regionInfo[regionID].coreLog) 760 punt = RF_TRUE; /* failed to find a core log */ 761 else { 762 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); 763 /* verify that the log has room for new 764 * entries */ 765 /* if log is full, dump it to disk and grab a 766 * new log */ 767 if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) { 768 /* log is full, dump it to disk */ 769 if (DumpParityLogToDisk(finish, item)) 770 punt = RF_TRUE; /* dump unsuccessful, 771 * blocked on 772 * reintegration */ 773 else { 774 /* dump was successful */ 775 if (incomingLog) 776 if (*incomingLog) { 777 RF_ASSERT((*incomingLog)->next == NULL); 778 raidPtr->regionInfo[regionID].coreLog = *incomingLog; 779 raidPtr->regionInfo[regionID].coreLog->regionID = regionID; 780 *incomingLog = NULL; 781 } else 782 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 783 else 784 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 785 /* if a core log is not 786 * available, must queue work 787 * and return */ 788 if (!raidPtr->regionInfo[regionID].coreLog) 789 punt = RF_TRUE; /* blocked on log 790 * availability */ 791 } 792 } 793 } 794 /* if we didn't punt on this item, attempt to add a 795 * sector to the core log */ 796 if (!punt) { 797 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); 798 /* at this point, we have a core log with 799 * enough room for a sector */ 800 /* copy a sector into the log */ 801 log = raidPtr->regionInfo[regionID].coreLog; 802 RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog); 803 logItem = log->numRecords++; 804 log->records[logItem].parityAddr = item->diskAddress; 805 RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr); 806 RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity); 807 log->records[logItem].parityAddr.numSector = 1; 808 log->records[logItem].operation = item->common->operation; 809 bcopy((item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (1 << item->common->raidPtr->logBytesPerSector)); 810 item->diskAddress.numSector--; 811 item->diskAddress.startSector++; 812 if (item->diskAddress.numSector == 0) 813 done = RF_TRUE; 814 } 815 } 816 817 if (!punt) { 818 /* Processed this item completely, decrement count of 819 * items to be processed. */ 820 RF_ASSERT(item->diskAddress.numSector == 0); 821 RF_LOCK_MUTEX(item->common->mutex); 822 item->common->cnt--; 823 if (item->common->cnt == 0) 824 itemDone = RF_TRUE; 825 else 826 itemDone = RF_FALSE; 827 RF_UNLOCK_MUTEX(item->common->mutex); 828 if (itemDone) { 829 /* Finished processing all log data for this 830 * IO Return structs to free list and invoke 831 * wakeup function. */ 832 timer = item->common->startTime; /* grab initial value of 833 * timer */ 834 RF_ETIMER_STOP(timer); 835 RF_ETIMER_EVAL(timer); 836 item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer); 837 if (rf_parityLogDebug) 838 printf("[waking process for region %d]\n", item->regionID); 839 wakeFunc = item->common->wakeFunc; 840 wakeArg = item->common->wakeArg; 841 FreeParityLogCommonData(item->common); 842 FreeParityLogData(item); 843 (wakeFunc) (wakeArg, 0); 844 } else 845 FreeParityLogData(item); 846 } 847 } 848 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 849 if (rf_parityLogDebug) 850 printf("[exiting ParityLogAppend]\n"); 851 return (0); 852} 853 854 855void 856rf_EnableParityLogging(RF_Raid_t * raidPtr) 857{ 858 int regionID; 859 860 for (regionID = 0; regionID < rf_numParityRegions; regionID++) { 861 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 862 raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE; 863 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 864 } 865 if (rf_parityLogDebug) 866 printf("[parity logging enabled]\n"); 867} 868#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 869