rf_paritylog.c revision 1.16
1/* $NetBSD: rf_paritylog.c,v 1.16 2011/05/11 03:38:32 mrg Exp $ */ 2/* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29/* Code for manipulating in-core parity logs 30 * 31 */ 32 33#include <sys/cdefs.h> 34__KERNEL_RCSID(0, "$NetBSD: rf_paritylog.c,v 1.16 2011/05/11 03:38:32 mrg Exp $"); 35 36#include "rf_archs.h" 37 38#if RF_INCLUDE_PARITYLOGGING > 0 39 40/* 41 * Append-only log for recording parity "update" and "overwrite" records 42 */ 43 44#include <dev/raidframe/raidframevar.h> 45 46#include "rf_threadstuff.h" 47#include "rf_mcpair.h" 48#include "rf_raid.h" 49#include "rf_dag.h" 50#include "rf_dagfuncs.h" 51#include "rf_desc.h" 52#include "rf_layout.h" 53#include "rf_diskqueue.h" 54#include "rf_etimer.h" 55#include "rf_paritylog.h" 56#include "rf_general.h" 57#include "rf_map.h" 58#include "rf_paritylogging.h" 59#include "rf_paritylogDiskMgr.h" 60 61static RF_CommonLogData_t * 62AllocParityLogCommonData(RF_Raid_t * raidPtr) 63{ 64 RF_CommonLogData_t *common = NULL; 65 66 /* Return a struct for holding common parity log information from the 67 * free list (rf_parityLogDiskQueue.freeCommonList). If the free list 68 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */ 69 70 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 71 if (raidPtr->parityLogDiskQueue.freeCommonList) { 72 common = raidPtr->parityLogDiskQueue.freeCommonList; 73 raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next; 74 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 75 } else { 76 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 77 RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *)); 78 /* destroy is in rf_paritylogging.c */ 79 rf_init_mutex2(common->mutex, IPL_VM); 80 } 81 common->next = NULL; 82 return (common); 83} 84 85static void 86FreeParityLogCommonData(RF_CommonLogData_t * common) 87{ 88 RF_Raid_t *raidPtr; 89 90 /* Insert a single struct for holding parity log information (data) 91 * into the free list (rf_parityLogDiskQueue.freeCommonList). 92 * NON-BLOCKING */ 93 94 raidPtr = common->raidPtr; 95 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 96 common->next = raidPtr->parityLogDiskQueue.freeCommonList; 97 raidPtr->parityLogDiskQueue.freeCommonList = common; 98 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 99} 100 101static RF_ParityLogData_t * 102AllocParityLogData(RF_Raid_t * raidPtr) 103{ 104 RF_ParityLogData_t *data = NULL; 105 106 /* Return a struct for holding parity log information from the free 107 * list (rf_parityLogDiskQueue.freeList). If the free list is empty, 108 * call RF_Malloc to create a new structure. NON-BLOCKING */ 109 110 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 111 if (raidPtr->parityLogDiskQueue.freeDataList) { 112 data = raidPtr->parityLogDiskQueue.freeDataList; 113 raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next; 114 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 115 } else { 116 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 117 RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *)); 118 } 119 data->next = NULL; 120 data->prev = NULL; 121 return (data); 122} 123 124 125static void 126FreeParityLogData(RF_ParityLogData_t * data) 127{ 128 RF_ParityLogData_t *nextItem; 129 RF_Raid_t *raidPtr; 130 131 /* Insert a linked list of structs for holding parity log information 132 * (data) into the free list (parityLogDiskQueue.freeList). 133 * NON-BLOCKING */ 134 135 raidPtr = data->common->raidPtr; 136 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 137 while (data) { 138 nextItem = data->next; 139 data->next = raidPtr->parityLogDiskQueue.freeDataList; 140 raidPtr->parityLogDiskQueue.freeDataList = data; 141 data = nextItem; 142 } 143 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 144} 145 146 147static void 148EnqueueParityLogData( 149 RF_ParityLogData_t * data, 150 RF_ParityLogData_t ** head, 151 RF_ParityLogData_t ** tail) 152{ 153 RF_Raid_t *raidPtr; 154 155 /* Insert an in-core parity log (*data) into the head of a disk queue 156 * (*head, *tail). NON-BLOCKING */ 157 158 raidPtr = data->common->raidPtr; 159 if (rf_parityLogDebug) 160 printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 161 RF_ASSERT(data->prev == NULL); 162 RF_ASSERT(data->next == NULL); 163 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 164 if (*head) { 165 /* insert into head of queue */ 166 RF_ASSERT((*head)->prev == NULL); 167 RF_ASSERT((*tail)->next == NULL); 168 data->next = *head; 169 (*head)->prev = data; 170 *head = data; 171 } else { 172 /* insert into empty list */ 173 RF_ASSERT(*head == NULL); 174 RF_ASSERT(*tail == NULL); 175 *head = data; 176 *tail = data; 177 } 178 RF_ASSERT((*head)->prev == NULL); 179 RF_ASSERT((*tail)->next == NULL); 180 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 181} 182 183static RF_ParityLogData_t * 184DequeueParityLogData( 185 RF_Raid_t * raidPtr, 186 RF_ParityLogData_t ** head, 187 RF_ParityLogData_t ** tail, 188 int ignoreLocks) 189{ 190 RF_ParityLogData_t *data; 191 192 /* Remove and return an in-core parity log from the tail of a disk 193 * queue (*head, *tail). NON-BLOCKING */ 194 195 /* remove from tail, preserving FIFO order */ 196 if (!ignoreLocks) 197 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 198 data = *tail; 199 if (data) { 200 if (*head == *tail) { 201 /* removing last item from queue */ 202 *head = NULL; 203 *tail = NULL; 204 } else { 205 *tail = (*tail)->prev; 206 (*tail)->next = NULL; 207 RF_ASSERT((*head)->prev == NULL); 208 RF_ASSERT((*tail)->next == NULL); 209 } 210 data->next = NULL; 211 data->prev = NULL; 212 if (rf_parityLogDebug) 213 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 214 } 215 if (*head) { 216 RF_ASSERT((*head)->prev == NULL); 217 RF_ASSERT((*tail)->next == NULL); 218 } 219 if (!ignoreLocks) 220 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 221 return (data); 222} 223 224 225static void 226RequeueParityLogData( 227 RF_ParityLogData_t * data, 228 RF_ParityLogData_t ** head, 229 RF_ParityLogData_t ** tail) 230{ 231 RF_Raid_t *raidPtr; 232 233 /* Insert an in-core parity log (*data) into the tail of a disk queue 234 * (*head, *tail). NON-BLOCKING */ 235 236 raidPtr = data->common->raidPtr; 237 RF_ASSERT(data); 238 if (rf_parityLogDebug) 239 printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 240 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 241 if (*tail) { 242 /* append to tail of list */ 243 data->prev = *tail; 244 data->next = NULL; 245 (*tail)->next = data; 246 *tail = data; 247 } else { 248 /* inserting into an empty list */ 249 *head = data; 250 *tail = data; 251 (*head)->prev = NULL; 252 (*tail)->next = NULL; 253 } 254 RF_ASSERT((*head)->prev == NULL); 255 RF_ASSERT((*tail)->next == NULL); 256 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 257} 258 259RF_ParityLogData_t * 260rf_CreateParityLogData( 261 RF_ParityRecordType_t operation, 262 RF_PhysDiskAddr_t * pda, 263 void *bufPtr, 264 RF_Raid_t * raidPtr, 265 int (*wakeFunc) (RF_DagNode_t * node, int status), 266 void *wakeArg, 267 RF_AccTraceEntry_t * tracerec, 268 RF_Etimer_t startTime) 269{ 270 RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL; 271 RF_CommonLogData_t *common; 272 RF_PhysDiskAddr_t *diskAddress; 273 int boundary, offset = 0; 274 275 /* Return an initialized struct of info to be logged. Build one item 276 * per physical disk address, one item per region. 277 * 278 * NON-BLOCKING */ 279 280 diskAddress = pda; 281 common = AllocParityLogCommonData(raidPtr); 282 RF_ASSERT(common); 283 284 common->operation = operation; 285 common->bufPtr = bufPtr; 286 common->raidPtr = raidPtr; 287 common->wakeFunc = wakeFunc; 288 common->wakeArg = wakeArg; 289 common->tracerec = tracerec; 290 common->startTime = startTime; 291 common->cnt = 0; 292 293 if (rf_parityLogDebug) 294 printf("[entering CreateParityLogData]\n"); 295 while (diskAddress) { 296 common->cnt++; 297 data = AllocParityLogData(raidPtr); 298 RF_ASSERT(data); 299 data->common = common; 300 data->next = NULL; 301 data->prev = NULL; 302 data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector); 303 if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) { 304 /* disk address does not cross a region boundary */ 305 data->diskAddress = *diskAddress; 306 data->bufOffset = offset; 307 offset = offset + diskAddress->numSector; 308 EnqueueParityLogData(data, &resultHead, &resultTail); 309 /* adjust disk address */ 310 diskAddress = diskAddress->next; 311 } else { 312 /* disk address crosses a region boundary */ 313 /* find address where region is crossed */ 314 boundary = 0; 315 while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary)) 316 boundary++; 317 318 /* enter data before the boundary */ 319 data->diskAddress = *diskAddress; 320 data->diskAddress.numSector = boundary; 321 data->bufOffset = offset; 322 offset += boundary; 323 EnqueueParityLogData(data, &resultHead, &resultTail); 324 /* adjust disk address */ 325 diskAddress->startSector += boundary; 326 diskAddress->numSector -= boundary; 327 } 328 } 329 if (rf_parityLogDebug) 330 printf("[leaving CreateParityLogData]\n"); 331 return (resultHead); 332} 333 334 335RF_ParityLogData_t * 336rf_SearchAndDequeueParityLogData( 337 RF_Raid_t * raidPtr, 338 int regionID, 339 RF_ParityLogData_t ** head, 340 RF_ParityLogData_t ** tail, 341 int ignoreLocks) 342{ 343 RF_ParityLogData_t *w; 344 345 /* Remove and return an in-core parity log from a specified region 346 * (regionID). If a matching log is not found, return NULL. 347 * 348 * NON-BLOCKING. */ 349 350 /* walk backward through a list, looking for an entry with a matching 351 * region ID */ 352 if (!ignoreLocks) 353 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 354 w = (*tail); 355 while (w) { 356 if (w->regionID == regionID) { 357 /* remove an element from the list */ 358 if (w == *tail) { 359 if (*head == *tail) { 360 /* removing only element in the list */ 361 *head = NULL; 362 *tail = NULL; 363 } else { 364 /* removing last item in the list */ 365 *tail = (*tail)->prev; 366 (*tail)->next = NULL; 367 RF_ASSERT((*head)->prev == NULL); 368 RF_ASSERT((*tail)->next == NULL); 369 } 370 } else { 371 if (w == *head) { 372 /* removing first item in the list */ 373 *head = (*head)->next; 374 (*head)->prev = NULL; 375 RF_ASSERT((*head)->prev == NULL); 376 RF_ASSERT((*tail)->next == NULL); 377 } else { 378 /* removing an item from the middle of 379 * the list */ 380 w->prev->next = w->next; 381 w->next->prev = w->prev; 382 RF_ASSERT((*head)->prev == NULL); 383 RF_ASSERT((*tail)->next == NULL); 384 } 385 } 386 w->prev = NULL; 387 w->next = NULL; 388 if (rf_parityLogDebug) 389 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector); 390 return (w); 391 } else 392 w = w->prev; 393 } 394 if (!ignoreLocks) 395 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 396 return (NULL); 397} 398 399static RF_ParityLogData_t * 400DequeueMatchingLogData( 401 RF_Raid_t * raidPtr, 402 RF_ParityLogData_t ** head, 403 RF_ParityLogData_t ** tail) 404{ 405 RF_ParityLogData_t *logDataList, *logData; 406 int regionID; 407 408 /* Remove and return an in-core parity log from the tail of a disk 409 * queue (*head, *tail). Then remove all matching (identical 410 * regionIDs) logData and return as a linked list. 411 * 412 * NON-BLOCKING */ 413 414 logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE); 415 if (logDataList) { 416 regionID = logDataList->regionID; 417 logData = logDataList; 418 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); 419 while (logData->next) { 420 logData = logData->next; 421 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); 422 } 423 } 424 return (logDataList); 425} 426 427 428static RF_ParityLog_t * 429AcquireParityLog( 430 RF_ParityLogData_t * logData, 431 int finish) 432{ 433 RF_ParityLog_t *log = NULL; 434 RF_Raid_t *raidPtr; 435 436 /* Grab a log buffer from the pool and return it. If no buffers are 437 * available, return NULL. NON-BLOCKING */ 438 raidPtr = logData->common->raidPtr; 439 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 440 if (raidPtr->parityLogPool.parityLogs) { 441 log = raidPtr->parityLogPool.parityLogs; 442 raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next; 443 log->regionID = logData->regionID; 444 log->numRecords = 0; 445 log->next = NULL; 446 raidPtr->logsInUse++; 447 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 448 } else { 449 /* no logs available, so place ourselves on the queue of work 450 * waiting on log buffers this is done while 451 * parityLogPool.mutex is held, to ensure synchronization with 452 * ReleaseParityLogs. */ 453 if (rf_parityLogDebug) 454 printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish); 455 if (finish) 456 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 457 else 458 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 459 } 460 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 461 return (log); 462} 463 464void 465rf_ReleaseParityLogs( 466 RF_Raid_t * raidPtr, 467 RF_ParityLog_t * firstLog) 468{ 469 RF_ParityLogData_t *logDataList; 470 RF_ParityLog_t *log, *lastLog; 471 int cnt; 472 473 /* Insert a linked list of parity logs (firstLog) to the free list 474 * (parityLogPool.parityLogPool) 475 * 476 * NON-BLOCKING. */ 477 478 RF_ASSERT(firstLog); 479 480 /* Before returning logs to global free list, service all requests 481 * which are blocked on logs. Holding mutexes for parityLogPool and 482 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */ 483 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 484 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 485 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 486 log = firstLog; 487 if (firstLog) 488 firstLog = firstLog->next; 489 log->numRecords = 0; 490 log->next = NULL; 491 while (logDataList && log) { 492 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 493 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 494 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE); 495 if (rf_parityLogDebug) 496 printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID); 497 if (log == NULL) { 498 log = firstLog; 499 if (firstLog) { 500 firstLog = firstLog->next; 501 log->numRecords = 0; 502 log->next = NULL; 503 } 504 } 505 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 506 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 507 if (log) 508 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 509 } 510 /* return remaining logs to pool */ 511 if (log) { 512 log->next = firstLog; 513 firstLog = log; 514 } 515 if (firstLog) { 516 lastLog = firstLog; 517 raidPtr->logsInUse--; 518 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 519 while (lastLog->next) { 520 lastLog = lastLog->next; 521 raidPtr->logsInUse--; 522 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 523 } 524 lastLog->next = raidPtr->parityLogPool.parityLogs; 525 raidPtr->parityLogPool.parityLogs = firstLog; 526 cnt = 0; 527 log = raidPtr->parityLogPool.parityLogs; 528 while (log) { 529 cnt++; 530 log = log->next; 531 } 532 RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs); 533 } 534 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 535 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 536} 537 538static void 539ReintLog( 540 RF_Raid_t * raidPtr, 541 int regionID, 542 RF_ParityLog_t * log) 543{ 544 RF_ASSERT(log); 545 546 /* Insert an in-core parity log (log) into the disk queue of 547 * reintegration work. Set the flag (reintInProgress) for the 548 * specified region (regionID) to indicate that reintegration is in 549 * progress for this region. NON-BLOCKING */ 550 551 rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex); 552 raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint 553 * complete */ 554 555 if (rf_parityLogDebug) 556 printf("[requesting reintegration of region %d]\n", log->regionID); 557 /* move record to reintegration queue */ 558 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 559 log->next = raidPtr->parityLogDiskQueue.reintQueue; 560 raidPtr->parityLogDiskQueue.reintQueue = log; 561 rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex); 562 rf_signal_cond2(raidPtr->parityLogDiskQueue.cond); 563 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 564} 565 566static void 567FlushLog( 568 RF_Raid_t * raidPtr, 569 RF_ParityLog_t * log) 570{ 571 /* insert a core log (log) into a list of logs 572 * (parityLogDiskQueue.flushQueue) waiting to be written to disk. 573 * NON-BLOCKING */ 574 575 RF_ASSERT(log); 576 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); 577 RF_ASSERT(log->next == NULL); 578 /* move log to flush queue */ 579 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 580 log->next = raidPtr->parityLogDiskQueue.flushQueue; 581 raidPtr->parityLogDiskQueue.flushQueue = log; 582 rf_signal_cond2(raidPtr->parityLogDiskQueue.cond); 583 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 584} 585 586static int 587DumpParityLogToDisk( 588 int finish, 589 RF_ParityLogData_t * logData) 590{ 591 int i, diskCount, regionID = logData->regionID; 592 RF_ParityLog_t *log; 593 RF_Raid_t *raidPtr; 594 595 raidPtr = logData->common->raidPtr; 596 597 /* Move a core log to disk. If the log disk is full, initiate 598 * reintegration. 599 * 600 * Return (0) if we can enqueue the dump immediately, otherwise return 601 * (1) to indicate we are blocked on reintegration and control of the 602 * thread should be relinquished. 603 * 604 * Caller must hold regionInfo[regionID].mutex 605 * 606 * NON-BLOCKING */ 607 608 if (rf_parityLogDebug) 609 printf("[dumping parity log to disk, region %d]\n", regionID); 610 log = raidPtr->regionInfo[regionID].coreLog; 611 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); 612 RF_ASSERT(log->next == NULL); 613 614 /* if reintegration is in progress, must queue work */ 615 rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex); 616 if (raidPtr->regionInfo[regionID].reintInProgress) { 617 /* Can not proceed since this region is currently being 618 * reintegrated. We can not block, so queue remaining work and 619 * return */ 620 if (rf_parityLogDebug) 621 printf("[region %d waiting on reintegration]\n", regionID); 622 /* XXX not sure about the use of finish - shouldn't this 623 * always be "Enqueue"? */ 624 if (finish) 625 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); 626 else 627 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); 628 rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex); 629 return (1); /* relenquish control of this thread */ 630 } 631 rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex); 632 raidPtr->regionInfo[regionID].coreLog = NULL; 633 if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity) 634 /* IMPORTANT!! this loop bound assumes region disk holds an 635 * integral number of core logs */ 636 { 637 /* update disk map for this region */ 638 diskCount = raidPtr->regionInfo[regionID].diskCount; 639 for (i = 0; i < raidPtr->numSectorsPerLog; i++) { 640 raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation; 641 raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr; 642 } 643 log->diskOffset = diskCount; 644 raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog; 645 FlushLog(raidPtr, log); 646 } else { 647 /* no room for log on disk, send it to disk manager and 648 * request reintegration */ 649 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity); 650 ReintLog(raidPtr, regionID, log); 651 } 652 if (rf_parityLogDebug) 653 printf("[finished dumping parity log to disk, region %d]\n", regionID); 654 return (0); 655} 656 657int 658rf_ParityLogAppend( 659 RF_ParityLogData_t * logData, 660 int finish, 661 RF_ParityLog_t ** incomingLog, 662 int clearReintFlag) 663{ 664 int regionID, logItem, itemDone; 665 RF_ParityLogData_t *item; 666 int punt, done = RF_FALSE; 667 RF_ParityLog_t *log; 668 RF_Raid_t *raidPtr; 669 RF_Etimer_t timer; 670 int (*wakeFunc) (RF_DagNode_t * node, int status); 671 void *wakeArg; 672 673 /* Add parity to the appropriate log, one sector at a time. This 674 * routine is called is called by dag functions ParityLogUpdateFunc 675 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING. 676 * 677 * Parity to be logged is contained in a linked-list (logData). When 678 * this routine returns, every sector in the list will be in one of 679 * three places: 1) entered into the parity log 2) queued, waiting on 680 * reintegration 3) queued, waiting on a core log 681 * 682 * Blocked work is passed to the ParityLoggingDiskManager for completion. 683 * Later, as conditions which required the block are removed, the work 684 * reenters this routine with the "finish" parameter set to "RF_TRUE." 685 * 686 * NON-BLOCKING */ 687 688 raidPtr = logData->common->raidPtr; 689 /* lock the region for the first item in logData */ 690 RF_ASSERT(logData != NULL); 691 regionID = logData->regionID; 692 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 693 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); 694 695 if (clearReintFlag) { 696 /* Enable flushing for this region. Holding both locks 697 * provides a synchronization barrier with DumpParityLogToDisk */ 698 rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex); 699 /* XXXmrg need this? */ 700 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 701 RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE); 702 raidPtr->regionInfo[regionID].diskCount = 0; 703 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; 704 rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now 705 * enabled */ 706 /* XXXmrg need this? */ 707 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 708 } 709 /* process each item in logData */ 710 while (logData) { 711 /* remove an item from logData */ 712 item = logData; 713 logData = logData->next; 714 item->next = NULL; 715 item->prev = NULL; 716 717 if (rf_parityLogDebug) 718 printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector); 719 720 /* see if we moved to a new region */ 721 if (regionID != item->regionID) { 722 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 723 regionID = item->regionID; 724 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 725 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); 726 } 727 punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This 728 * can happen in one of two ways: 1) no core 729 * log (AcquireParityLog) 2) waiting on 730 * reintegration (DumpParityLogToDisk) If punt 731 * is RF_TRUE, the dataItem was queued, so 732 * skip to next item. */ 733 734 /* process item, one sector at a time, until all sectors 735 * processed or we punt */ 736 if (item->diskAddress.numSector > 0) 737 done = RF_FALSE; 738 else 739 RF_ASSERT(0); 740 while (!punt && !done) { 741 /* verify that a core log exists for this region */ 742 if (!raidPtr->regionInfo[regionID].coreLog) { 743 /* Attempt to acquire a parity log. If 744 * acquisition fails, queue remaining work in 745 * data item and move to nextItem. */ 746 if (incomingLog) 747 if (*incomingLog) { 748 RF_ASSERT((*incomingLog)->next == NULL); 749 raidPtr->regionInfo[regionID].coreLog = *incomingLog; 750 raidPtr->regionInfo[regionID].coreLog->regionID = regionID; 751 *incomingLog = NULL; 752 } else 753 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 754 else 755 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 756 /* Note: AcquireParityLog either returns a log 757 * or enqueues currentItem */ 758 } 759 if (!raidPtr->regionInfo[regionID].coreLog) 760 punt = RF_TRUE; /* failed to find a core log */ 761 else { 762 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); 763 /* verify that the log has room for new 764 * entries */ 765 /* if log is full, dump it to disk and grab a 766 * new log */ 767 if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) { 768 /* log is full, dump it to disk */ 769 if (DumpParityLogToDisk(finish, item)) 770 punt = RF_TRUE; /* dump unsuccessful, 771 * blocked on 772 * reintegration */ 773 else { 774 /* dump was successful */ 775 if (incomingLog) 776 if (*incomingLog) { 777 RF_ASSERT((*incomingLog)->next == NULL); 778 raidPtr->regionInfo[regionID].coreLog = *incomingLog; 779 raidPtr->regionInfo[regionID].coreLog->regionID = regionID; 780 *incomingLog = NULL; 781 } else 782 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 783 else 784 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 785 /* if a core log is not 786 * available, must queue work 787 * and return */ 788 if (!raidPtr->regionInfo[regionID].coreLog) 789 punt = RF_TRUE; /* blocked on log 790 * availability */ 791 } 792 } 793 } 794 /* if we didn't punt on this item, attempt to add a 795 * sector to the core log */ 796 if (!punt) { 797 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); 798 /* at this point, we have a core log with 799 * enough room for a sector */ 800 /* copy a sector into the log */ 801 log = raidPtr->regionInfo[regionID].coreLog; 802 RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog); 803 logItem = log->numRecords++; 804 log->records[logItem].parityAddr = item->diskAddress; 805 RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr); 806 RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity); 807 log->records[logItem].parityAddr.numSector = 1; 808 log->records[logItem].operation = item->common->operation; 809 memcpy((char *)log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), ((char *)item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), (1 << item->common->raidPtr->logBytesPerSector)); 810 item->diskAddress.numSector--; 811 item->diskAddress.startSector++; 812 if (item->diskAddress.numSector == 0) 813 done = RF_TRUE; 814 } 815 } 816 817 if (!punt) { 818 /* Processed this item completely, decrement count of 819 * items to be processed. */ 820 RF_ASSERT(item->diskAddress.numSector == 0); 821 rf_lock_mutex2(item->common->mutex); 822 item->common->cnt--; 823 if (item->common->cnt == 0) 824 itemDone = RF_TRUE; 825 else 826 itemDone = RF_FALSE; 827 rf_unlock_mutex2(item->common->mutex); 828 if (itemDone) { 829 /* Finished processing all log data for this 830 * IO Return structs to free list and invoke 831 * wakeup function. */ 832 timer = item->common->startTime; /* grab initial value of 833 * timer */ 834 RF_ETIMER_STOP(timer); 835 RF_ETIMER_EVAL(timer); 836 item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer); 837 if (rf_parityLogDebug) 838 printf("[waking process for region %d]\n", item->regionID); 839 wakeFunc = item->common->wakeFunc; 840 wakeArg = item->common->wakeArg; 841 FreeParityLogCommonData(item->common); 842 FreeParityLogData(item); 843 (wakeFunc) (wakeArg, 0); 844 } else 845 FreeParityLogData(item); 846 } 847 } 848 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 849 if (rf_parityLogDebug) 850 printf("[exiting ParityLogAppend]\n"); 851 return (0); 852} 853 854 855void 856rf_EnableParityLogging(RF_Raid_t * raidPtr) 857{ 858 int regionID; 859 860 for (regionID = 0; regionID < rf_numParityRegions; regionID++) { 861 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 862 raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE; 863 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 864 } 865 if (rf_parityLogDebug) 866 printf("[parity logging enabled]\n"); 867} 868#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 869