rf_paritylog.c revision 1.3
1/* $NetBSD: rf_paritylog.c,v 1.3 1999/02/05 00:06:13 oster Exp $ */ 2/* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29/* Code for manipulating in-core parity logs 30 * 31 */ 32 33#include "rf_archs.h" 34 35#if RF_INCLUDE_PARITYLOGGING > 0 36 37/* 38 * Append-only log for recording parity "update" and "overwrite" records 39 */ 40 41#include "rf_types.h" 42#include "rf_threadstuff.h" 43#include "rf_mcpair.h" 44#include "rf_raid.h" 45#include "rf_dag.h" 46#include "rf_dagfuncs.h" 47#include "rf_desc.h" 48#include "rf_layout.h" 49#include "rf_diskqueue.h" 50#include "rf_etimer.h" 51#include "rf_paritylog.h" 52#include "rf_general.h" 53#include "rf_threadid.h" 54#include "rf_map.h" 55#include "rf_paritylogging.h" 56#include "rf_paritylogDiskMgr.h" 57#include "rf_sys.h" 58 59static RF_CommonLogData_t * 60AllocParityLogCommonData(RF_Raid_t * raidPtr) 61{ 62 RF_CommonLogData_t *common = NULL; 63 int rc; 64 65 /* Return a struct for holding common parity log information from the 66 * free list (rf_parityLogDiskQueue.freeCommonList). If the free list 67 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */ 68 69 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 70 if (raidPtr->parityLogDiskQueue.freeCommonList) { 71 common = raidPtr->parityLogDiskQueue.freeCommonList; 72 raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next; 73 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 74 } else { 75 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 76 RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *)); 77 rc = rf_mutex_init(&common->mutex); 78 if (rc) { 79 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, 80 __LINE__, rc); 81 RF_Free(common, sizeof(RF_CommonLogData_t)); 82 common = NULL; 83 } 84 } 85 common->next = NULL; 86 return (common); 87} 88 89static void 90FreeParityLogCommonData(RF_CommonLogData_t * common) 91{ 92 RF_Raid_t *raidPtr; 93 94 /* Insert a single struct for holding parity log information (data) 95 * into the free list (rf_parityLogDiskQueue.freeCommonList). 96 * NON-BLOCKING */ 97 98 raidPtr = common->raidPtr; 99 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 100 common->next = raidPtr->parityLogDiskQueue.freeCommonList; 101 raidPtr->parityLogDiskQueue.freeCommonList = common; 102 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 103} 104 105static RF_ParityLogData_t * 106AllocParityLogData(RF_Raid_t * raidPtr) 107{ 108 RF_ParityLogData_t *data = NULL; 109 110 /* Return a struct for holding parity log information from the free 111 * list (rf_parityLogDiskQueue.freeList). If the free list is empty, 112 * call RF_Malloc to create a new structure. NON-BLOCKING */ 113 114 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 115 if (raidPtr->parityLogDiskQueue.freeDataList) { 116 data = raidPtr->parityLogDiskQueue.freeDataList; 117 raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next; 118 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 119 } else { 120 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 121 RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *)); 122 } 123 data->next = NULL; 124 data->prev = NULL; 125 return (data); 126} 127 128 129static void 130FreeParityLogData(RF_ParityLogData_t * data) 131{ 132 RF_ParityLogData_t *nextItem; 133 RF_Raid_t *raidPtr; 134 135 /* Insert a linked list of structs for holding parity log information 136 * (data) into the free list (parityLogDiskQueue.freeList). 137 * NON-BLOCKING */ 138 139 raidPtr = data->common->raidPtr; 140 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 141 while (data) { 142 nextItem = data->next; 143 data->next = raidPtr->parityLogDiskQueue.freeDataList; 144 raidPtr->parityLogDiskQueue.freeDataList = data; 145 data = nextItem; 146 } 147 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 148} 149 150 151static void 152EnqueueParityLogData( 153 RF_ParityLogData_t * data, 154 RF_ParityLogData_t ** head, 155 RF_ParityLogData_t ** tail) 156{ 157 RF_Raid_t *raidPtr; 158 159 /* Insert an in-core parity log (*data) into the head of a disk queue 160 * (*head, *tail). NON-BLOCKING */ 161 162 raidPtr = data->common->raidPtr; 163 if (rf_parityLogDebug) 164 printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 165 RF_ASSERT(data->prev == NULL); 166 RF_ASSERT(data->next == NULL); 167 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 168 if (*head) { 169 /* insert into head of queue */ 170 RF_ASSERT((*head)->prev == NULL); 171 RF_ASSERT((*tail)->next == NULL); 172 data->next = *head; 173 (*head)->prev = data; 174 *head = data; 175 } else { 176 /* insert into empty list */ 177 RF_ASSERT(*head == NULL); 178 RF_ASSERT(*tail == NULL); 179 *head = data; 180 *tail = data; 181 } 182 RF_ASSERT((*head)->prev == NULL); 183 RF_ASSERT((*tail)->next == NULL); 184 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 185} 186 187static RF_ParityLogData_t * 188DequeueParityLogData( 189 RF_Raid_t * raidPtr, 190 RF_ParityLogData_t ** head, 191 RF_ParityLogData_t ** tail, 192 int ignoreLocks) 193{ 194 RF_ParityLogData_t *data; 195 196 /* Remove and return an in-core parity log from the tail of a disk 197 * queue (*head, *tail). NON-BLOCKING */ 198 199 /* remove from tail, preserving FIFO order */ 200 if (!ignoreLocks) 201 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 202 data = *tail; 203 if (data) { 204 if (*head == *tail) { 205 /* removing last item from queue */ 206 *head = NULL; 207 *tail = NULL; 208 } else { 209 *tail = (*tail)->prev; 210 (*tail)->next = NULL; 211 RF_ASSERT((*head)->prev == NULL); 212 RF_ASSERT((*tail)->next == NULL); 213 } 214 data->next = NULL; 215 data->prev = NULL; 216 if (rf_parityLogDebug) 217 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 218 } 219 if (*head) { 220 RF_ASSERT((*head)->prev == NULL); 221 RF_ASSERT((*tail)->next == NULL); 222 } 223 if (!ignoreLocks) 224 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 225 return (data); 226} 227 228 229static void 230RequeueParityLogData( 231 RF_ParityLogData_t * data, 232 RF_ParityLogData_t ** head, 233 RF_ParityLogData_t ** tail) 234{ 235 RF_Raid_t *raidPtr; 236 237 /* Insert an in-core parity log (*data) into the tail of a disk queue 238 * (*head, *tail). NON-BLOCKING */ 239 240 raidPtr = data->common->raidPtr; 241 RF_ASSERT(data); 242 if (rf_parityLogDebug) 243 printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 244 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 245 if (*tail) { 246 /* append to tail of list */ 247 data->prev = *tail; 248 data->next = NULL; 249 (*tail)->next = data; 250 *tail = data; 251 } else { 252 /* inserting into an empty list */ 253 *head = data; 254 *tail = data; 255 (*head)->prev = NULL; 256 (*tail)->next = NULL; 257 } 258 RF_ASSERT((*head)->prev == NULL); 259 RF_ASSERT((*tail)->next == NULL); 260 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 261} 262 263RF_ParityLogData_t * 264rf_CreateParityLogData( 265 RF_ParityRecordType_t operation, 266 RF_PhysDiskAddr_t * pda, 267 caddr_t bufPtr, 268 RF_Raid_t * raidPtr, 269 int (*wakeFunc) (RF_DagNode_t * node, int status), 270 void *wakeArg, 271 RF_AccTraceEntry_t * tracerec, 272 RF_Etimer_t startTime) 273{ 274 RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL; 275 RF_CommonLogData_t *common; 276 RF_PhysDiskAddr_t *diskAddress; 277 int boundary, offset = 0; 278 279 /* Return an initialized struct of info to be logged. Build one item 280 * per physical disk address, one item per region. 281 * 282 * NON-BLOCKING */ 283 284 diskAddress = pda; 285 common = AllocParityLogCommonData(raidPtr); 286 RF_ASSERT(common); 287 288 common->operation = operation; 289 common->bufPtr = bufPtr; 290 common->raidPtr = raidPtr; 291 common->wakeFunc = wakeFunc; 292 common->wakeArg = wakeArg; 293 common->tracerec = tracerec; 294 common->startTime = startTime; 295 common->cnt = 0; 296 297 if (rf_parityLogDebug) 298 printf("[entering CreateParityLogData]\n"); 299 while (diskAddress) { 300 common->cnt++; 301 data = AllocParityLogData(raidPtr); 302 RF_ASSERT(data); 303 data->common = common; 304 data->next = NULL; 305 data->prev = NULL; 306 data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector); 307 if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) { 308 /* disk address does not cross a region boundary */ 309 data->diskAddress = *diskAddress; 310 data->bufOffset = offset; 311 offset = offset + diskAddress->numSector; 312 EnqueueParityLogData(data, &resultHead, &resultTail); 313 /* adjust disk address */ 314 diskAddress = diskAddress->next; 315 } else { 316 /* disk address crosses a region boundary */ 317 /* find address where region is crossed */ 318 boundary = 0; 319 while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary)) 320 boundary++; 321 322 /* enter data before the boundary */ 323 data->diskAddress = *diskAddress; 324 data->diskAddress.numSector = boundary; 325 data->bufOffset = offset; 326 offset += boundary; 327 EnqueueParityLogData(data, &resultHead, &resultTail); 328 /* adjust disk address */ 329 diskAddress->startSector += boundary; 330 diskAddress->numSector -= boundary; 331 } 332 } 333 if (rf_parityLogDebug) 334 printf("[leaving CreateParityLogData]\n"); 335 return (resultHead); 336} 337 338 339RF_ParityLogData_t * 340rf_SearchAndDequeueParityLogData( 341 RF_Raid_t * raidPtr, 342 int regionID, 343 RF_ParityLogData_t ** head, 344 RF_ParityLogData_t ** tail, 345 int ignoreLocks) 346{ 347 RF_ParityLogData_t *w; 348 349 /* Remove and return an in-core parity log from a specified region 350 * (regionID). If a matching log is not found, return NULL. 351 * 352 * NON-BLOCKING. */ 353 354 /* walk backward through a list, looking for an entry with a matching 355 * region ID */ 356 if (!ignoreLocks) 357 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 358 w = (*tail); 359 while (w) { 360 if (w->regionID == regionID) { 361 /* remove an element from the list */ 362 if (w == *tail) { 363 if (*head == *tail) { 364 /* removing only element in the list */ 365 *head = NULL; 366 *tail = NULL; 367 } else { 368 /* removing last item in the list */ 369 *tail = (*tail)->prev; 370 (*tail)->next = NULL; 371 RF_ASSERT((*head)->prev == NULL); 372 RF_ASSERT((*tail)->next == NULL); 373 } 374 } else { 375 if (w == *head) { 376 /* removing first item in the list */ 377 *head = (*head)->next; 378 (*head)->prev = NULL; 379 RF_ASSERT((*head)->prev == NULL); 380 RF_ASSERT((*tail)->next == NULL); 381 } else { 382 /* removing an item from the middle of 383 * the list */ 384 w->prev->next = w->next; 385 w->next->prev = w->prev; 386 RF_ASSERT((*head)->prev == NULL); 387 RF_ASSERT((*tail)->next == NULL); 388 } 389 } 390 w->prev = NULL; 391 w->next = NULL; 392 if (rf_parityLogDebug) 393 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector); 394 return (w); 395 } else 396 w = w->prev; 397 } 398 if (!ignoreLocks) 399 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 400 return (NULL); 401} 402 403static RF_ParityLogData_t * 404DequeueMatchingLogData( 405 RF_Raid_t * raidPtr, 406 RF_ParityLogData_t ** head, 407 RF_ParityLogData_t ** tail) 408{ 409 RF_ParityLogData_t *logDataList, *logData; 410 int regionID; 411 412 /* Remove and return an in-core parity log from the tail of a disk 413 * queue (*head, *tail). Then remove all matching (identical 414 * regionIDs) logData and return as a linked list. 415 * 416 * NON-BLOCKING */ 417 418 logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE); 419 if (logDataList) { 420 regionID = logDataList->regionID; 421 logData = logDataList; 422 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); 423 while (logData->next) { 424 logData = logData->next; 425 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); 426 } 427 } 428 return (logDataList); 429} 430 431 432static RF_ParityLog_t * 433AcquireParityLog( 434 RF_ParityLogData_t * logData, 435 int finish) 436{ 437 RF_ParityLog_t *log = NULL; 438 RF_Raid_t *raidPtr; 439 440 /* Grab a log buffer from the pool and return it. If no buffers are 441 * available, return NULL. NON-BLOCKING */ 442 raidPtr = logData->common->raidPtr; 443 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 444 if (raidPtr->parityLogPool.parityLogs) { 445 log = raidPtr->parityLogPool.parityLogs; 446 raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next; 447 log->regionID = logData->regionID; 448 log->numRecords = 0; 449 log->next = NULL; 450 raidPtr->logsInUse++; 451 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 452 } else { 453 /* no logs available, so place ourselves on the queue of work 454 * waiting on log buffers this is done while 455 * parityLogPool.mutex is held, to ensure synchronization with 456 * ReleaseParityLogs. */ 457 if (rf_parityLogDebug) 458 printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish); 459 if (finish) 460 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 461 else 462 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 463 } 464 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 465 return (log); 466} 467 468void 469rf_ReleaseParityLogs( 470 RF_Raid_t * raidPtr, 471 RF_ParityLog_t * firstLog) 472{ 473 RF_ParityLogData_t *logDataList; 474 RF_ParityLog_t *log, *lastLog; 475 int cnt; 476 477 /* Insert a linked list of parity logs (firstLog) to the free list 478 * (parityLogPool.parityLogPool) 479 * 480 * NON-BLOCKING. */ 481 482 RF_ASSERT(firstLog); 483 484 /* Before returning logs to global free list, service all requests 485 * which are blocked on logs. Holding mutexes for parityLogPool and 486 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */ 487 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 488 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 489 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 490 log = firstLog; 491 if (firstLog) 492 firstLog = firstLog->next; 493 log->numRecords = 0; 494 log->next = NULL; 495 while (logDataList && log) { 496 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 497 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 498 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE); 499 if (rf_parityLogDebug) 500 printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID); 501 if (log == NULL) { 502 log = firstLog; 503 if (firstLog) { 504 firstLog = firstLog->next; 505 log->numRecords = 0; 506 log->next = NULL; 507 } 508 } 509 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 510 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 511 if (log) 512 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 513 } 514 /* return remaining logs to pool */ 515 if (log) { 516 log->next = firstLog; 517 firstLog = log; 518 } 519 if (firstLog) { 520 lastLog = firstLog; 521 raidPtr->logsInUse--; 522 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 523 while (lastLog->next) { 524 lastLog = lastLog->next; 525 raidPtr->logsInUse--; 526 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 527 } 528 lastLog->next = raidPtr->parityLogPool.parityLogs; 529 raidPtr->parityLogPool.parityLogs = firstLog; 530 cnt = 0; 531 log = raidPtr->parityLogPool.parityLogs; 532 while (log) { 533 cnt++; 534 log = log->next; 535 } 536 RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs); 537 } 538 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 539 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 540} 541 542static void 543ReintLog( 544 RF_Raid_t * raidPtr, 545 int regionID, 546 RF_ParityLog_t * log) 547{ 548 RF_ASSERT(log); 549 550 /* Insert an in-core parity log (log) into the disk queue of 551 * reintegration work. Set the flag (reintInProgress) for the 552 * specified region (regionID) to indicate that reintegration is in 553 * progress for this region. NON-BLOCKING */ 554 555 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 556 raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint 557 * complete */ 558 559 if (rf_parityLogDebug) 560 printf("[requesting reintegration of region %d]\n", log->regionID); 561 /* move record to reintegration queue */ 562 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 563 log->next = raidPtr->parityLogDiskQueue.reintQueue; 564 raidPtr->parityLogDiskQueue.reintQueue = log; 565 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 566 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 567 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); 568} 569 570static void 571FlushLog( 572 RF_Raid_t * raidPtr, 573 RF_ParityLog_t * log) 574{ 575 /* insert a core log (log) into a list of logs 576 * (parityLogDiskQueue.flushQueue) waiting to be written to disk. 577 * NON-BLOCKING */ 578 579 RF_ASSERT(log); 580 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); 581 RF_ASSERT(log->next == NULL); 582 /* move log to flush queue */ 583 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 584 log->next = raidPtr->parityLogDiskQueue.flushQueue; 585 raidPtr->parityLogDiskQueue.flushQueue = log; 586 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 587 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); 588} 589 590static int 591DumpParityLogToDisk( 592 int finish, 593 RF_ParityLogData_t * logData) 594{ 595 int i, diskCount, regionID = logData->regionID; 596 RF_ParityLog_t *log; 597 RF_Raid_t *raidPtr; 598 599 raidPtr = logData->common->raidPtr; 600 601 /* Move a core log to disk. If the log disk is full, initiate 602 * reintegration. 603 * 604 * Return (0) if we can enqueue the dump immediately, otherwise return 605 * (1) to indicate we are blocked on reintegration and control of the 606 * thread should be relinquished. 607 * 608 * Caller must hold regionInfo[regionID].mutex 609 * 610 * NON-BLOCKING */ 611 612 if (rf_parityLogDebug) 613 printf("[dumping parity log to disk, region %d]\n", regionID); 614 log = raidPtr->regionInfo[regionID].coreLog; 615 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); 616 RF_ASSERT(log->next == NULL); 617 618 /* if reintegration is in progress, must queue work */ 619 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 620 if (raidPtr->regionInfo[regionID].reintInProgress) { 621 /* Can not proceed since this region is currently being 622 * reintegrated. We can not block, so queue remaining work and 623 * return */ 624 if (rf_parityLogDebug) 625 printf("[region %d waiting on reintegration]\n", regionID); 626 /* XXX not sure about the use of finish - shouldn't this 627 * always be "Enqueue"? */ 628 if (finish) 629 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); 630 else 631 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); 632 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 633 return (1); /* relenquish control of this thread */ 634 } 635 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 636 raidPtr->regionInfo[regionID].coreLog = NULL; 637 if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity) 638 /* IMPORTANT!! this loop bound assumes region disk holds an 639 * integral number of core logs */ 640 { 641 /* update disk map for this region */ 642 diskCount = raidPtr->regionInfo[regionID].diskCount; 643 for (i = 0; i < raidPtr->numSectorsPerLog; i++) { 644 raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation; 645 raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr; 646 } 647 log->diskOffset = diskCount; 648 raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog; 649 FlushLog(raidPtr, log); 650 } else { 651 /* no room for log on disk, send it to disk manager and 652 * request reintegration */ 653 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity); 654 ReintLog(raidPtr, regionID, log); 655 } 656 if (rf_parityLogDebug) 657 printf("[finished dumping parity log to disk, region %d]\n", regionID); 658 return (0); 659} 660 661int 662rf_ParityLogAppend( 663 RF_ParityLogData_t * logData, 664 int finish, 665 RF_ParityLog_t ** incomingLog, 666 int clearReintFlag) 667{ 668 int regionID, logItem, itemDone; 669 RF_ParityLogData_t *item; 670 int punt, done = RF_FALSE; 671 RF_ParityLog_t *log; 672 RF_Raid_t *raidPtr; 673 RF_Etimer_t timer; 674 int (*wakeFunc) (RF_DagNode_t * node, int status); 675 void *wakeArg; 676 677 /* Add parity to the appropriate log, one sector at a time. This 678 * routine is called is called by dag functions ParityLogUpdateFunc 679 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING. 680 * 681 * Parity to be logged is contained in a linked-list (logData). When 682 * this routine returns, every sector in the list will be in one of 683 * three places: 1) entered into the parity log 2) queued, waiting on 684 * reintegration 3) queued, waiting on a core log 685 * 686 * Blocked work is passed to the ParityLoggingDiskManager for completion. 687 * Later, as conditions which required the block are removed, the work 688 * reenters this routine with the "finish" parameter set to "RF_TRUE." 689 * 690 * NON-BLOCKING */ 691 692 raidPtr = logData->common->raidPtr; 693 /* lock the region for the first item in logData */ 694 RF_ASSERT(logData != NULL); 695 regionID = logData->regionID; 696 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 697 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); 698 699 if (clearReintFlag) { 700 /* Enable flushing for this region. Holding both locks 701 * provides a synchronization barrier with DumpParityLogToDisk */ 702 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 703 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 704 RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE); 705 raidPtr->regionInfo[regionID].diskCount = 0; 706 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; 707 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now 708 * enabled */ 709 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 710 } 711 /* process each item in logData */ 712 while (logData) { 713 /* remove an item from logData */ 714 item = logData; 715 logData = logData->next; 716 item->next = NULL; 717 item->prev = NULL; 718 719 if (rf_parityLogDebug) 720 printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector); 721 722 /* see if we moved to a new region */ 723 if (regionID != item->regionID) { 724 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 725 regionID = item->regionID; 726 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 727 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); 728 } 729 punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This 730 * can happen in one of two ways: 1) no core 731 * log (AcquireParityLog) 2) waiting on 732 * reintegration (DumpParityLogToDisk) If punt 733 * is RF_TRUE, the dataItem was queued, so 734 * skip to next item. */ 735 736 /* process item, one sector at a time, until all sectors 737 * processed or we punt */ 738 if (item->diskAddress.numSector > 0) 739 done = RF_FALSE; 740 else 741 RF_ASSERT(0); 742 while (!punt && !done) { 743 /* verify that a core log exists for this region */ 744 if (!raidPtr->regionInfo[regionID].coreLog) { 745 /* Attempt to acquire a parity log. If 746 * acquisition fails, queue remaining work in 747 * data item and move to nextItem. */ 748 if (incomingLog) 749 if (*incomingLog) { 750 RF_ASSERT((*incomingLog)->next == NULL); 751 raidPtr->regionInfo[regionID].coreLog = *incomingLog; 752 raidPtr->regionInfo[regionID].coreLog->regionID = regionID; 753 *incomingLog = NULL; 754 } else 755 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 756 else 757 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 758 /* Note: AcquireParityLog either returns a log 759 * or enqueues currentItem */ 760 } 761 if (!raidPtr->regionInfo[regionID].coreLog) 762 punt = RF_TRUE; /* failed to find a core log */ 763 else { 764 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); 765 /* verify that the log has room for new 766 * entries */ 767 /* if log is full, dump it to disk and grab a 768 * new log */ 769 if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) { 770 /* log is full, dump it to disk */ 771 if (DumpParityLogToDisk(finish, item)) 772 punt = RF_TRUE; /* dump unsuccessful, 773 * blocked on 774 * reintegration */ 775 else { 776 /* dump was successful */ 777 if (incomingLog) 778 if (*incomingLog) { 779 RF_ASSERT((*incomingLog)->next == NULL); 780 raidPtr->regionInfo[regionID].coreLog = *incomingLog; 781 raidPtr->regionInfo[regionID].coreLog->regionID = regionID; 782 *incomingLog = NULL; 783 } else 784 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 785 else 786 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 787 /* if a core log is not 788 * available, must queue work 789 * and return */ 790 if (!raidPtr->regionInfo[regionID].coreLog) 791 punt = RF_TRUE; /* blocked on log 792 * availability */ 793 } 794 } 795 } 796 /* if we didn't punt on this item, attempt to add a 797 * sector to the core log */ 798 if (!punt) { 799 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); 800 /* at this point, we have a core log with 801 * enough room for a sector */ 802 /* copy a sector into the log */ 803 log = raidPtr->regionInfo[regionID].coreLog; 804 RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog); 805 logItem = log->numRecords++; 806 log->records[logItem].parityAddr = item->diskAddress; 807 RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr); 808 RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity); 809 log->records[logItem].parityAddr.numSector = 1; 810 log->records[logItem].operation = item->common->operation; 811 bcopy((item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (1 << item->common->raidPtr->logBytesPerSector)); 812 item->diskAddress.numSector--; 813 item->diskAddress.startSector++; 814 if (item->diskAddress.numSector == 0) 815 done = RF_TRUE; 816 } 817 } 818 819 if (!punt) { 820 /* Processed this item completely, decrement count of 821 * items to be processed. */ 822 RF_ASSERT(item->diskAddress.numSector == 0); 823 RF_LOCK_MUTEX(item->common->mutex); 824 item->common->cnt--; 825 if (item->common->cnt == 0) 826 itemDone = RF_TRUE; 827 else 828 itemDone = RF_FALSE; 829 RF_UNLOCK_MUTEX(item->common->mutex); 830 if (itemDone) { 831 /* Finished processing all log data for this 832 * IO Return structs to free list and invoke 833 * wakeup function. */ 834 timer = item->common->startTime; /* grab initial value of 835 * timer */ 836 RF_ETIMER_STOP(timer); 837 RF_ETIMER_EVAL(timer); 838 item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer); 839 if (rf_parityLogDebug) 840 printf("[waking process for region %d]\n", item->regionID); 841 wakeFunc = item->common->wakeFunc; 842 wakeArg = item->common->wakeArg; 843 FreeParityLogCommonData(item->common); 844 FreeParityLogData(item); 845 (wakeFunc) (wakeArg, 0); 846 } else 847 FreeParityLogData(item); 848 } 849 } 850 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 851 if (rf_parityLogDebug) 852 printf("[exiting ParityLogAppend]\n"); 853 return (0); 854} 855 856 857void 858rf_EnableParityLogging(RF_Raid_t * raidPtr) 859{ 860 int regionID; 861 862 for (regionID = 0; regionID < rf_numParityRegions; regionID++) { 863 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 864 raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE; 865 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 866 } 867 if (rf_parityLogDebug) 868 printf("[parity logging enabled]\n"); 869} 870#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 871