rf_paritylog.c revision 1.2
1/* $NetBSD: rf_paritylog.c,v 1.2 1999/01/26 02:33:59 oster Exp $ */ 2/* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29/* Code for manipulating in-core parity logs 30 * 31 */ 32 33#include "rf_archs.h" 34 35#if RF_INCLUDE_PARITYLOGGING > 0 36 37/* 38 * Append-only log for recording parity "update" and "overwrite" records 39 */ 40 41#include "rf_types.h" 42#include "rf_threadstuff.h" 43#include "rf_mcpair.h" 44#include "rf_raid.h" 45#include "rf_dag.h" 46#include "rf_dagfuncs.h" 47#include "rf_desc.h" 48#include "rf_layout.h" 49#include "rf_diskqueue.h" 50#include "rf_etimer.h" 51#include "rf_paritylog.h" 52#include "rf_general.h" 53#include "rf_threadid.h" 54#include "rf_map.h" 55#include "rf_paritylogging.h" 56#include "rf_paritylogDiskMgr.h" 57#include "rf_sys.h" 58 59static RF_CommonLogData_t *AllocParityLogCommonData(RF_Raid_t *raidPtr) 60{ 61 RF_CommonLogData_t *common = NULL; 62 int rc; 63 64 /* Return a struct for holding common parity log information from the free 65 list (rf_parityLogDiskQueue.freeCommonList). If the free list is empty, call 66 RF_Malloc to create a new structure. 67 NON-BLOCKING */ 68 69 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 70 if (raidPtr->parityLogDiskQueue.freeCommonList) 71 { 72 common = raidPtr->parityLogDiskQueue.freeCommonList; 73 raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next; 74 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 75 } 76 else 77 { 78 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 79 RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *)); 80 rc = rf_mutex_init(&common->mutex); 81 if (rc) { 82 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, 83 __LINE__, rc); 84 RF_Free(common, sizeof(RF_CommonLogData_t)); 85 common = NULL; 86 } 87 } 88 common->next = NULL; 89 return(common); 90} 91 92static void FreeParityLogCommonData(RF_CommonLogData_t *common) 93{ 94 RF_Raid_t *raidPtr; 95 96 /* Insert a single struct for holding parity log information 97 (data) into the free list (rf_parityLogDiskQueue.freeCommonList). 98 NON-BLOCKING */ 99 100 raidPtr = common->raidPtr; 101 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 102 common->next = raidPtr->parityLogDiskQueue.freeCommonList; 103 raidPtr->parityLogDiskQueue.freeCommonList = common; 104 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 105} 106 107static RF_ParityLogData_t *AllocParityLogData(RF_Raid_t *raidPtr) 108{ 109 RF_ParityLogData_t *data = NULL; 110 111 /* Return a struct for holding parity log information from the free 112 list (rf_parityLogDiskQueue.freeList). If the free list is empty, call 113 RF_Malloc to create a new structure. 114 NON-BLOCKING */ 115 116 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 117 if (raidPtr->parityLogDiskQueue.freeDataList) 118 { 119 data = raidPtr->parityLogDiskQueue.freeDataList; 120 raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next; 121 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 122 } 123 else 124 { 125 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 126 RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *)); 127 } 128 data->next = NULL; 129 data->prev = NULL; 130 return(data); 131} 132 133 134static void FreeParityLogData(RF_ParityLogData_t *data) 135{ 136 RF_ParityLogData_t *nextItem; 137 RF_Raid_t *raidPtr; 138 139 /* Insert a linked list of structs for holding parity log 140 information (data) into the free list (parityLogDiskQueue.freeList). 141 NON-BLOCKING */ 142 143 raidPtr = data->common->raidPtr; 144 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 145 while (data) 146 { 147 nextItem = data->next; 148 data->next = raidPtr->parityLogDiskQueue.freeDataList; 149 raidPtr->parityLogDiskQueue.freeDataList = data; 150 data = nextItem; 151 } 152 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 153} 154 155 156static void EnqueueParityLogData( 157 RF_ParityLogData_t *data, 158 RF_ParityLogData_t **head, 159 RF_ParityLogData_t **tail) 160{ 161 RF_Raid_t *raidPtr; 162 163 /* Insert an in-core parity log (*data) into the head of 164 a disk queue (*head, *tail). 165 NON-BLOCKING */ 166 167 raidPtr = data->common->raidPtr; 168 if (rf_parityLogDebug) 169 printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int)data->diskAddress.numSector); 170 RF_ASSERT(data->prev == NULL); 171 RF_ASSERT(data->next == NULL); 172 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 173 if (*head) 174 { 175 /* insert into head of queue */ 176 RF_ASSERT((*head)->prev == NULL); 177 RF_ASSERT((*tail)->next == NULL); 178 data->next = *head; 179 (*head)->prev = data; 180 *head = data; 181 } 182 else 183 { 184 /* insert into empty list */ 185 RF_ASSERT(*head == NULL); 186 RF_ASSERT(*tail == NULL); 187 *head = data; 188 *tail = data; 189 } 190 RF_ASSERT((*head)->prev == NULL); 191 RF_ASSERT((*tail)->next == NULL); 192 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 193} 194 195static RF_ParityLogData_t *DequeueParityLogData( 196 RF_Raid_t *raidPtr, 197 RF_ParityLogData_t **head, 198 RF_ParityLogData_t **tail, 199 int ignoreLocks) 200{ 201 RF_ParityLogData_t *data; 202 203 /* Remove and return an in-core parity log from the tail of 204 a disk queue (*head, *tail). 205 NON-BLOCKING */ 206 207 /* remove from tail, preserving FIFO order */ 208 if (!ignoreLocks) 209 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 210 data = *tail; 211 if (data) 212 { 213 if (*head == *tail) 214 { 215 /* removing last item from queue */ 216 *head = NULL; 217 *tail = NULL; 218 } 219 else 220 { 221 *tail = (*tail)->prev; 222 (*tail)->next = NULL; 223 RF_ASSERT((*head)->prev == NULL); 224 RF_ASSERT((*tail)->next == NULL); 225 } 226 data->next = NULL; 227 data->prev = NULL; 228 if (rf_parityLogDebug) 229 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int)data->diskAddress.numSector); 230 } 231 if (*head) 232 { 233 RF_ASSERT((*head)->prev == NULL); 234 RF_ASSERT((*tail)->next == NULL); 235 } 236 if (!ignoreLocks) 237 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 238 return(data); 239} 240 241 242static void RequeueParityLogData( 243 RF_ParityLogData_t *data, 244 RF_ParityLogData_t **head, 245 RF_ParityLogData_t **tail) 246{ 247 RF_Raid_t *raidPtr; 248 249 /* Insert an in-core parity log (*data) into the tail of 250 a disk queue (*head, *tail). 251 NON-BLOCKING */ 252 253 raidPtr = data->common->raidPtr; 254 RF_ASSERT(data); 255 if (rf_parityLogDebug) 256 printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 257 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 258 if (*tail) 259 { 260 /* append to tail of list */ 261 data->prev = *tail; 262 data->next = NULL; 263 (*tail)->next = data; 264 *tail = data; 265 } 266 else 267 { 268 /* inserting into an empty list */ 269 *head = data; 270 *tail = data; 271 (*head)->prev = NULL; 272 (*tail)->next = NULL; 273 } 274 RF_ASSERT((*head)->prev == NULL); 275 RF_ASSERT((*tail)->next == NULL); 276 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 277} 278 279RF_ParityLogData_t *rf_CreateParityLogData( 280 RF_ParityRecordType_t operation, 281 RF_PhysDiskAddr_t *pda, 282 caddr_t bufPtr, 283 RF_Raid_t *raidPtr, 284 int (*wakeFunc)(RF_DagNode_t *node, int status), 285 void *wakeArg, 286 RF_AccTraceEntry_t *tracerec, 287 RF_Etimer_t startTime) 288{ 289 RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL; 290 RF_CommonLogData_t *common; 291 RF_PhysDiskAddr_t *diskAddress; 292 int boundary, offset = 0; 293 294 /* Return an initialized struct of info to be logged. 295 Build one item per physical disk address, one item per region. 296 297 NON-BLOCKING */ 298 299 diskAddress = pda; 300 common = AllocParityLogCommonData(raidPtr); 301 RF_ASSERT(common); 302 303 common->operation = operation; 304 common->bufPtr = bufPtr; 305 common->raidPtr = raidPtr; 306 common->wakeFunc = wakeFunc; 307 common->wakeArg = wakeArg; 308 common->tracerec = tracerec; 309 common->startTime = startTime; 310 common->cnt = 0; 311 312 if (rf_parityLogDebug) 313 printf("[entering CreateParityLogData]\n"); 314 while (diskAddress) 315 { 316 common->cnt++; 317 data = AllocParityLogData(raidPtr); 318 RF_ASSERT(data); 319 data->common = common; 320 data->next = NULL; 321 data->prev = NULL; 322 data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector); 323 if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) 324 { 325 /* disk address does not cross a region boundary */ 326 data->diskAddress = *diskAddress; 327 data->bufOffset = offset; 328 offset = offset + diskAddress->numSector; 329 EnqueueParityLogData(data, &resultHead, &resultTail); 330 /* adjust disk address */ 331 diskAddress = diskAddress->next; 332 } 333 else 334 { 335 /* disk address crosses a region boundary */ 336 /* find address where region is crossed */ 337 boundary = 0; 338 while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary)) 339 boundary++; 340 341 /* enter data before the boundary */ 342 data->diskAddress = *diskAddress; 343 data->diskAddress.numSector = boundary; 344 data->bufOffset = offset; 345 offset += boundary; 346 EnqueueParityLogData(data, &resultHead, &resultTail); 347 /* adjust disk address */ 348 diskAddress->startSector += boundary; 349 diskAddress->numSector -= boundary; 350 } 351 } 352 if (rf_parityLogDebug) 353 printf("[leaving CreateParityLogData]\n"); 354 return(resultHead); 355} 356 357 358RF_ParityLogData_t *rf_SearchAndDequeueParityLogData( 359 RF_Raid_t *raidPtr, 360 int regionID, 361 RF_ParityLogData_t **head, 362 RF_ParityLogData_t **tail, 363 int ignoreLocks) 364{ 365 RF_ParityLogData_t *w; 366 367 /* Remove and return an in-core parity log from a specified region (regionID). 368 If a matching log is not found, return NULL. 369 370 NON-BLOCKING. 371 */ 372 373 /* walk backward through a list, looking for an entry with a matching region ID */ 374 if (!ignoreLocks) 375 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 376 w = (*tail); 377 while (w) 378 { 379 if (w->regionID == regionID) 380 { 381 /* remove an element from the list */ 382 if (w == *tail) 383 { 384 if (*head == *tail) 385 { 386 /* removing only element in the list */ 387 *head = NULL; 388 *tail = NULL; 389 } 390 else 391 { 392 /* removing last item in the list */ 393 *tail = (*tail)->prev; 394 (*tail)->next = NULL; 395 RF_ASSERT((*head)->prev == NULL); 396 RF_ASSERT((*tail)->next == NULL); 397 } 398 } 399 else 400 { 401 if (w == *head) 402 { 403 /* removing first item in the list */ 404 *head = (*head)->next; 405 (*head)->prev = NULL; 406 RF_ASSERT((*head)->prev == NULL); 407 RF_ASSERT((*tail)->next == NULL); 408 } 409 else 410 { 411 /* removing an item from the middle of the list */ 412 w->prev->next = w->next; 413 w->next->prev = w->prev; 414 RF_ASSERT((*head)->prev == NULL); 415 RF_ASSERT((*tail)->next == NULL); 416 } 417 } 418 w->prev = NULL; 419 w->next = NULL; 420 if (rf_parityLogDebug) 421 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n",w->regionID,(int)w->diskAddress.raidAddress,(int) w->diskAddress.numSector); 422 return(w); 423 } 424 else 425 w = w->prev; 426 } 427 if (!ignoreLocks) 428 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 429 return(NULL); 430} 431 432static RF_ParityLogData_t *DequeueMatchingLogData( 433 RF_Raid_t *raidPtr, 434 RF_ParityLogData_t **head, 435 RF_ParityLogData_t **tail) 436{ 437 RF_ParityLogData_t *logDataList, *logData; 438 int regionID; 439 440 /* Remove and return an in-core parity log from the tail of 441 a disk queue (*head, *tail). Then remove all matching 442 (identical regionIDs) logData and return as a linked list. 443 444 NON-BLOCKING 445 */ 446 447 logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE); 448 if (logDataList) 449 { 450 regionID = logDataList->regionID; 451 logData = logDataList; 452 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); 453 while (logData->next) 454 { 455 logData = logData->next; 456 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); 457 } 458 } 459 return(logDataList); 460} 461 462 463static RF_ParityLog_t *AcquireParityLog( 464 RF_ParityLogData_t *logData, 465 int finish) 466{ 467 RF_ParityLog_t *log = NULL; 468 RF_Raid_t *raidPtr; 469 470 /* Grab a log buffer from the pool and return it. 471 If no buffers are available, return NULL. 472 NON-BLOCKING 473 */ 474 raidPtr = logData->common->raidPtr; 475 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 476 if (raidPtr->parityLogPool.parityLogs) 477 { 478 log = raidPtr->parityLogPool.parityLogs; 479 raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next; 480 log->regionID = logData->regionID; 481 log->numRecords = 0; 482 log->next = NULL; 483 raidPtr->logsInUse++; 484 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 485 } 486 else 487 { 488 /* no logs available, so place ourselves on the queue of work waiting on log buffers 489 this is done while parityLogPool.mutex is held, to ensure synchronization 490 with ReleaseParityLogs. 491 */ 492 if (rf_parityLogDebug) 493 printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish); 494 if (finish) 495 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 496 else 497 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 498 } 499 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 500 return(log); 501} 502 503void rf_ReleaseParityLogs( 504 RF_Raid_t *raidPtr, 505 RF_ParityLog_t *firstLog) 506{ 507 RF_ParityLogData_t *logDataList; 508 RF_ParityLog_t *log, *lastLog; 509 int cnt; 510 511 /* Insert a linked list of parity logs (firstLog) to 512 the free list (parityLogPool.parityLogPool) 513 514 NON-BLOCKING. 515 */ 516 517 RF_ASSERT(firstLog); 518 519 /* Before returning logs to global free list, service all 520 requests which are blocked on logs. Holding mutexes for parityLogPool and parityLogDiskQueue 521 forces synchronization with AcquireParityLog(). 522 */ 523 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 524 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 525 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 526 log = firstLog; 527 if (firstLog) 528 firstLog = firstLog->next; 529 log->numRecords = 0; 530 log->next = NULL; 531 while (logDataList && log) 532 { 533 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 534 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 535 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE); 536 if (rf_parityLogDebug) 537 printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID); 538 if (log == NULL) 539 { 540 log = firstLog; 541 if (firstLog) 542 { 543 firstLog = firstLog->next; 544 log->numRecords = 0; 545 log->next = NULL; 546 } 547 } 548 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 549 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 550 if (log) 551 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 552 } 553 /* return remaining logs to pool */ 554 if (log) 555 { 556 log->next = firstLog; 557 firstLog = log; 558 } 559 if (firstLog) 560 { 561 lastLog = firstLog; 562 raidPtr->logsInUse--; 563 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 564 while (lastLog->next) 565 { 566 lastLog = lastLog->next; 567 raidPtr->logsInUse--; 568 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 569 } 570 lastLog->next = raidPtr->parityLogPool.parityLogs; 571 raidPtr->parityLogPool.parityLogs = firstLog; 572 cnt = 0; 573 log = raidPtr->parityLogPool.parityLogs; 574 while (log) 575 { 576 cnt++; 577 log = log->next; 578 } 579 RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs); 580 } 581 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 582 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 583} 584 585static void ReintLog( 586 RF_Raid_t *raidPtr, 587 int regionID, 588 RF_ParityLog_t *log) 589{ 590 RF_ASSERT(log); 591 592 /* Insert an in-core parity log (log) into the disk queue of reintegration 593 work. Set the flag (reintInProgress) for the specified region (regionID) 594 to indicate that reintegration is in progress for this region. 595 NON-BLOCKING 596 */ 597 598 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 599 raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint complete */ 600 601 if (rf_parityLogDebug) 602 printf("[requesting reintegration of region %d]\n", log->regionID); 603 /* move record to reintegration queue */ 604 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 605 log->next = raidPtr->parityLogDiskQueue.reintQueue; 606 raidPtr->parityLogDiskQueue.reintQueue = log; 607 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 608 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 609 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); 610} 611 612static void FlushLog( 613 RF_Raid_t *raidPtr, 614 RF_ParityLog_t *log) 615{ 616 /* insert a core log (log) into a list of logs (parityLogDiskQueue.flushQueue) 617 waiting to be written to disk. 618 NON-BLOCKING 619 */ 620 621 RF_ASSERT(log); 622 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); 623 RF_ASSERT(log->next == NULL); 624 /* move log to flush queue */ 625 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 626 log->next = raidPtr->parityLogDiskQueue.flushQueue; 627 raidPtr->parityLogDiskQueue.flushQueue = log; 628 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 629 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); 630} 631 632static int DumpParityLogToDisk( 633 int finish, 634 RF_ParityLogData_t *logData) 635{ 636 int i, diskCount, regionID = logData->regionID; 637 RF_ParityLog_t *log; 638 RF_Raid_t *raidPtr; 639 640 raidPtr = logData->common->raidPtr; 641 642 /* Move a core log to disk. If the log disk is full, initiate 643 reintegration. 644 645 Return (0) if we can enqueue the dump immediately, otherwise 646 return (1) to indicate we are blocked on reintegration and 647 control of the thread should be relinquished. 648 649 Caller must hold regionInfo[regionID].mutex 650 651 NON-BLOCKING 652 */ 653 654 if (rf_parityLogDebug) 655 printf("[dumping parity log to disk, region %d]\n", regionID); 656 log = raidPtr->regionInfo[regionID].coreLog; 657 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); 658 RF_ASSERT(log->next == NULL); 659 660 /* if reintegration is in progress, must queue work */ 661 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 662 if (raidPtr->regionInfo[regionID].reintInProgress) 663 { 664 /* Can not proceed since this region is currently being reintegrated. 665 We can not block, so queue remaining work and return */ 666 if (rf_parityLogDebug) 667 printf("[region %d waiting on reintegration]\n",regionID); 668 /* XXX not sure about the use of finish - shouldn't this always be "Enqueue"? */ 669 if (finish) 670 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); 671 else 672 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); 673 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 674 return(1); /* relenquish control of this thread */ 675 } 676 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 677 raidPtr->regionInfo[regionID].coreLog = NULL; 678 if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity) 679 /* IMPORTANT!! this loop bound assumes region disk holds an integral number of core logs */ 680 { 681 /* update disk map for this region */ 682 diskCount = raidPtr->regionInfo[regionID].diskCount; 683 for (i = 0; i < raidPtr->numSectorsPerLog; i++) 684 { 685 raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation; 686 raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr; 687 } 688 log->diskOffset = diskCount; 689 raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog; 690 FlushLog(raidPtr, log); 691 } 692 else 693 { 694 /* no room for log on disk, send it to disk manager and request reintegration */ 695 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity); 696 ReintLog(raidPtr, regionID, log); 697 } 698 if (rf_parityLogDebug) 699 printf("[finished dumping parity log to disk, region %d]\n", regionID); 700 return(0); 701} 702 703int rf_ParityLogAppend( 704 RF_ParityLogData_t *logData, 705 int finish, 706 RF_ParityLog_t **incomingLog, 707 int clearReintFlag) 708{ 709 int regionID, logItem, itemDone; 710 RF_ParityLogData_t *item; 711 int punt, done = RF_FALSE; 712 RF_ParityLog_t *log; 713 RF_Raid_t *raidPtr; 714 RF_Etimer_t timer; 715 int (*wakeFunc)(RF_DagNode_t *node, int status); 716 void *wakeArg; 717 718 /* Add parity to the appropriate log, one sector at a time. 719 This routine is called is called by dag functions ParityLogUpdateFunc 720 and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING. 721 722 Parity to be logged is contained in a linked-list (logData). When 723 this routine returns, every sector in the list will be in one of 724 three places: 725 1) entered into the parity log 726 2) queued, waiting on reintegration 727 3) queued, waiting on a core log 728 729 Blocked work is passed to the ParityLoggingDiskManager for completion. 730 Later, as conditions which required the block are removed, the work 731 reenters this routine with the "finish" parameter set to "RF_TRUE." 732 733 NON-BLOCKING 734 */ 735 736 raidPtr = logData->common->raidPtr; 737 /* lock the region for the first item in logData */ 738 RF_ASSERT(logData != NULL); 739 regionID = logData->regionID; 740 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 741 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); 742 743 if (clearReintFlag) 744 { 745 /* Enable flushing for this region. Holding both locks provides 746 a synchronization barrier with DumpParityLogToDisk 747 */ 748 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 749 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 750 RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE); 751 raidPtr->regionInfo[regionID].diskCount = 0; 752 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; 753 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now enabled */ 754 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 755 } 756 757 /* process each item in logData */ 758 while (logData) 759 { 760 /* remove an item from logData */ 761 item = logData; 762 logData = logData->next; 763 item->next = NULL; 764 item->prev = NULL; 765 766 if (rf_parityLogDebug) 767 printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n",item->regionID,(int)item->diskAddress.raidAddress, (int)item->diskAddress.numSector); 768 769 /* see if we moved to a new region */ 770 if (regionID != item->regionID) 771 { 772 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 773 regionID = item->regionID; 774 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 775 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); 776 } 777 778 punt = RF_FALSE; /* Set to RF_TRUE if work is blocked. This can happen in one of two ways: 779 1) no core log (AcquireParityLog) 780 2) waiting on reintegration (DumpParityLogToDisk) 781 If punt is RF_TRUE, the dataItem was queued, so skip to next item. 782 */ 783 784 /* process item, one sector at a time, until all sectors processed or we punt */ 785 if (item->diskAddress.numSector > 0) 786 done = RF_FALSE; 787 else 788 RF_ASSERT(0); 789 while (!punt && !done) 790 { 791 /* verify that a core log exists for this region */ 792 if (!raidPtr->regionInfo[regionID].coreLog) 793 { 794 /* Attempt to acquire a parity log. 795 If acquisition fails, queue remaining work in data item and move to nextItem. 796 */ 797 if (incomingLog) 798 if (*incomingLog) 799 { 800 RF_ASSERT((*incomingLog)->next == NULL); 801 raidPtr->regionInfo[regionID].coreLog = *incomingLog; 802 raidPtr->regionInfo[regionID].coreLog->regionID = regionID; 803 *incomingLog = NULL; 804 } 805 else 806 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 807 else 808 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 809 /* Note: AcquireParityLog either returns a log or enqueues currentItem */ 810 } 811 if (!raidPtr->regionInfo[regionID].coreLog) 812 punt = RF_TRUE; /* failed to find a core log */ 813 else 814 { 815 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); 816 /* verify that the log has room for new entries */ 817 /* if log is full, dump it to disk and grab a new log */ 818 if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) 819 { 820 /* log is full, dump it to disk */ 821 if (DumpParityLogToDisk(finish, item)) 822 punt = RF_TRUE; /* dump unsuccessful, blocked on reintegration */ 823 else 824 { 825 /* dump was successful */ 826 if (incomingLog) 827 if (*incomingLog) 828 { 829 RF_ASSERT((*incomingLog)->next == NULL); 830 raidPtr->regionInfo[regionID].coreLog = *incomingLog; 831 raidPtr->regionInfo[regionID].coreLog->regionID = regionID; 832 *incomingLog = NULL; 833 } 834 else 835 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 836 else 837 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 838 /* if a core log is not available, must queue work and return */ 839 if (!raidPtr->regionInfo[regionID].coreLog) 840 punt = RF_TRUE; /* blocked on log availability */ 841 } 842 } 843 } 844 /* if we didn't punt on this item, attempt to add a sector to the core log */ 845 if (!punt) 846 { 847 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); 848 /* at this point, we have a core log with enough room for a sector */ 849 /* copy a sector into the log */ 850 log = raidPtr->regionInfo[regionID].coreLog; 851 RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog); 852 logItem = log->numRecords++; 853 log->records[logItem].parityAddr = item->diskAddress; 854 RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr); 855 RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity); 856 log->records[logItem].parityAddr.numSector = 1; 857 log->records[logItem].operation = item->common->operation; 858 bcopy((item->common->bufPtr + (item->bufOffset++ * (1<<item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1<<item->common->raidPtr->logBytesPerSector)), (1<<item->common->raidPtr->logBytesPerSector)); 859 item->diskAddress.numSector--; 860 item->diskAddress.startSector++; 861 if (item->diskAddress.numSector == 0) 862 done = RF_TRUE; 863 } 864 } 865 866 if (!punt) 867 { 868 /* Processed this item completely, decrement count of items 869 to be processed. 870 */ 871 RF_ASSERT(item->diskAddress.numSector == 0); 872 RF_LOCK_MUTEX(item->common->mutex); 873 item->common->cnt--; 874 if (item->common->cnt == 0) 875 itemDone = RF_TRUE; 876 else 877 itemDone = RF_FALSE; 878 RF_UNLOCK_MUTEX(item->common->mutex); 879 if (itemDone) 880 { 881 /* Finished processing all log data for this IO 882 Return structs to free list and invoke wakeup function. 883 */ 884 timer = item->common->startTime; /* grab initial value of timer */ 885 RF_ETIMER_STOP(timer); 886 RF_ETIMER_EVAL(timer); 887 item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer); 888 if (rf_parityLogDebug) 889 printf("[waking process for region %d]\n", item->regionID); 890 wakeFunc = item->common->wakeFunc; 891 wakeArg = item->common->wakeArg; 892 FreeParityLogCommonData(item->common); 893 FreeParityLogData(item); 894 (wakeFunc)(wakeArg, 0); 895 } 896 else 897 FreeParityLogData(item); 898 } 899 } 900 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 901 if (rf_parityLogDebug) 902 printf("[exiting ParityLogAppend]\n"); 903 return(0); 904} 905 906 907void rf_EnableParityLogging(RF_Raid_t *raidPtr) 908{ 909 int regionID; 910 911 for (regionID = 0; regionID < rf_numParityRegions; regionID++) { 912 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 913 raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE; 914 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 915 } 916 if (rf_parityLogDebug) 917 printf("[parity logging enabled]\n"); 918} 919 920#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 921