rf_paritylog.c revision 1.1
1/* $NetBSD: rf_paritylog.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */ 2/* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29/* Code for manipulating in-core parity logs 30 * 31 * : 32 * Log: rf_paritylog.c,v 33 * Revision 1.27 1996/07/28 20:31:39 jimz 34 * i386netbsd port 35 * true/false fixup 36 * 37 * Revision 1.26 1996/07/27 23:36:08 jimz 38 * Solaris port of simulator 39 * 40 * Revision 1.25 1996/07/17 21:00:58 jimz 41 * clean up timer interface, tracing 42 * 43 * Revision 1.24 1996/06/11 10:18:59 jimz 44 * AllocParityLogCommonData() was freeing the common pointer immediately 45 * after allocating this. It appeared that this free really belonged 46 * inside one of the failure cases (for backing out), so I moved it 47 * in there. 48 * 49 * Revision 1.23 1996/06/05 18:06:02 jimz 50 * Major code cleanup. The Great Renaming is now done. 51 * Better modularity. Better typing. Fixed a bunch of 52 * synchronization bugs. Made a lot of global stuff 53 * per-desc or per-array. Removed dead code. 54 * 55 * Revision 1.22 1996/06/02 17:31:48 jimz 56 * Moved a lot of global stuff into array structure, where it belongs. 57 * Fixed up paritylogging, pss modules in this manner. Some general 58 * code cleanup. Removed lots of dead code, some dead files. 59 * 60 * Revision 1.21 1996/05/31 22:26:54 jimz 61 * fix a lot of mapping problems, memory allocation problems 62 * found some weird lock issues, fixed 'em 63 * more code cleanup 64 * 65 * Revision 1.20 1996/05/30 23:22:16 jimz 66 * bugfixes of serialization, timing problems 67 * more cleanup 68 * 69 * Revision 1.19 1996/05/30 12:59:18 jimz 70 * make etimer happier, more portable 71 * 72 * Revision 1.18 1996/05/27 18:56:37 jimz 73 * more code cleanup 74 * better typing 75 * compiles in all 3 environments 76 * 77 * Revision 1.17 1996/05/24 04:28:55 jimz 78 * release cleanup ckpt 79 * 80 * Revision 1.16 1996/05/23 21:46:35 jimz 81 * checkpoint in code cleanup (release prep) 82 * lots of types, function names have been fixed 83 * 84 * Revision 1.15 1996/05/23 00:33:23 jimz 85 * code cleanup: move all debug decls to rf_options.c, all extern 86 * debug decls to rf_options.h, all debug vars preceded by rf_ 87 * 88 * Revision 1.14 1996/05/20 16:16:59 jimz 89 * switch to rf_{mutex,cond}_{init,destroy} 90 * 91 * Revision 1.13 1996/05/18 19:51:34 jimz 92 * major code cleanup- fix syntax, make some types consistent, 93 * add prototypes, clean out dead code, et cetera 94 * 95 * Revision 1.12 1995/12/12 18:10:06 jimz 96 * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT 97 * fix 80-column brain damage in comments 98 * 99 * Revision 1.11 1995/12/06 20:54:44 wvcii 100 * added prototyping 101 * 102 * Revision 1.10 1995/11/30 16:05:37 wvcii 103 * added copyright info 104 * 105 * Revision 1.9 1995/10/08 20:41:28 wvcii 106 * fixed bug in allocation of CommonLogData (was allocating incorrect size) 107 * 108 * Revision 1.8 1995/09/07 15:52:12 jimz 109 * noop compile when INCLUDE_PARITYLOGGING not defined 110 * 111 * Revision 1.7 1995/09/06 19:17:36 wvcii 112 * moved code for reintegration to rf_paritylogDiskMgr.c 113 * 114 * Revision 1.6 95/07/07 00:16:06 wvcii 115 * this version free from deadlock, fails parity verification 116 * 117 * Revision 1.5 1995/06/09 13:14:24 wvcii 118 * code is now nonblocking 119 * 120 * Revision 1.4 95/06/01 17:01:59 wvcii 121 * code debug 122 * 123 * Revision 1.3 95/05/31 13:08:23 wvcii 124 * code debug 125 * 126 * Revision 1.2 95/05/21 15:42:15 wvcii 127 * code debug 128 * 129 * Revision 1.1 95/05/18 10:43:54 wvcii 130 * Initial revision 131 * 132 */ 133 134#include "rf_archs.h" 135 136#if RF_INCLUDE_PARITYLOGGING > 0 137 138/* 139 * Append-only log for recording parity "update" and "overwrite" records 140 */ 141 142#include "rf_types.h" 143#include "rf_threadstuff.h" 144#include "rf_mcpair.h" 145#include "rf_raid.h" 146#include "rf_dag.h" 147#include "rf_dagfuncs.h" 148#include "rf_desc.h" 149#include "rf_layout.h" 150#include "rf_diskqueue.h" 151#include "rf_etimer.h" 152#include "rf_paritylog.h" 153#include "rf_general.h" 154#include "rf_threadid.h" 155#include "rf_map.h" 156#include "rf_paritylogging.h" 157#include "rf_paritylogDiskMgr.h" 158#include "rf_sys.h" 159 160static RF_CommonLogData_t *AllocParityLogCommonData(RF_Raid_t *raidPtr) 161{ 162 RF_CommonLogData_t *common = NULL; 163 int rc; 164 165 /* Return a struct for holding common parity log information from the free 166 list (rf_parityLogDiskQueue.freeCommonList). If the free list is empty, call 167 RF_Malloc to create a new structure. 168 NON-BLOCKING */ 169 170 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 171 if (raidPtr->parityLogDiskQueue.freeCommonList) 172 { 173 common = raidPtr->parityLogDiskQueue.freeCommonList; 174 raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next; 175 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 176 } 177 else 178 { 179 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 180 RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *)); 181 rc = rf_mutex_init(&common->mutex); 182 if (rc) { 183 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, 184 __LINE__, rc); 185 RF_Free(common, sizeof(RF_CommonLogData_t)); 186 common = NULL; 187 } 188 } 189 common->next = NULL; 190 return(common); 191} 192 193static void FreeParityLogCommonData(RF_CommonLogData_t *common) 194{ 195 RF_Raid_t *raidPtr; 196 197 /* Insert a single struct for holding parity log information 198 (data) into the free list (rf_parityLogDiskQueue.freeCommonList). 199 NON-BLOCKING */ 200 201 raidPtr = common->raidPtr; 202 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 203 common->next = raidPtr->parityLogDiskQueue.freeCommonList; 204 raidPtr->parityLogDiskQueue.freeCommonList = common; 205 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 206} 207 208static RF_ParityLogData_t *AllocParityLogData(RF_Raid_t *raidPtr) 209{ 210 RF_ParityLogData_t *data = NULL; 211 212 /* Return a struct for holding parity log information from the free 213 list (rf_parityLogDiskQueue.freeList). If the free list is empty, call 214 RF_Malloc to create a new structure. 215 NON-BLOCKING */ 216 217 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 218 if (raidPtr->parityLogDiskQueue.freeDataList) 219 { 220 data = raidPtr->parityLogDiskQueue.freeDataList; 221 raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next; 222 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 223 } 224 else 225 { 226 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 227 RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *)); 228 } 229 data->next = NULL; 230 data->prev = NULL; 231 return(data); 232} 233 234 235static void FreeParityLogData(RF_ParityLogData_t *data) 236{ 237 RF_ParityLogData_t *nextItem; 238 RF_Raid_t *raidPtr; 239 240 /* Insert a linked list of structs for holding parity log 241 information (data) into the free list (parityLogDiskQueue.freeList). 242 NON-BLOCKING */ 243 244 raidPtr = data->common->raidPtr; 245 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 246 while (data) 247 { 248 nextItem = data->next; 249 data->next = raidPtr->parityLogDiskQueue.freeDataList; 250 raidPtr->parityLogDiskQueue.freeDataList = data; 251 data = nextItem; 252 } 253 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 254} 255 256 257static void EnqueueParityLogData( 258 RF_ParityLogData_t *data, 259 RF_ParityLogData_t **head, 260 RF_ParityLogData_t **tail) 261{ 262 RF_Raid_t *raidPtr; 263 264 /* Insert an in-core parity log (*data) into the head of 265 a disk queue (*head, *tail). 266 NON-BLOCKING */ 267 268 raidPtr = data->common->raidPtr; 269 if (rf_parityLogDebug) 270 printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int)data->diskAddress.numSector); 271 RF_ASSERT(data->prev == NULL); 272 RF_ASSERT(data->next == NULL); 273 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 274 if (*head) 275 { 276 /* insert into head of queue */ 277 RF_ASSERT((*head)->prev == NULL); 278 RF_ASSERT((*tail)->next == NULL); 279 data->next = *head; 280 (*head)->prev = data; 281 *head = data; 282 } 283 else 284 { 285 /* insert into empty list */ 286 RF_ASSERT(*head == NULL); 287 RF_ASSERT(*tail == NULL); 288 *head = data; 289 *tail = data; 290 } 291 RF_ASSERT((*head)->prev == NULL); 292 RF_ASSERT((*tail)->next == NULL); 293 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 294} 295 296static RF_ParityLogData_t *DequeueParityLogData( 297 RF_Raid_t *raidPtr, 298 RF_ParityLogData_t **head, 299 RF_ParityLogData_t **tail, 300 int ignoreLocks) 301{ 302 RF_ParityLogData_t *data; 303 304 /* Remove and return an in-core parity log from the tail of 305 a disk queue (*head, *tail). 306 NON-BLOCKING */ 307 308 /* remove from tail, preserving FIFO order */ 309 if (!ignoreLocks) 310 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 311 data = *tail; 312 if (data) 313 { 314 if (*head == *tail) 315 { 316 /* removing last item from queue */ 317 *head = NULL; 318 *tail = NULL; 319 } 320 else 321 { 322 *tail = (*tail)->prev; 323 (*tail)->next = NULL; 324 RF_ASSERT((*head)->prev == NULL); 325 RF_ASSERT((*tail)->next == NULL); 326 } 327 data->next = NULL; 328 data->prev = NULL; 329 if (rf_parityLogDebug) 330 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int)data->diskAddress.numSector); 331 } 332 if (*head) 333 { 334 RF_ASSERT((*head)->prev == NULL); 335 RF_ASSERT((*tail)->next == NULL); 336 } 337 if (!ignoreLocks) 338 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 339 return(data); 340} 341 342 343static void RequeueParityLogData( 344 RF_ParityLogData_t *data, 345 RF_ParityLogData_t **head, 346 RF_ParityLogData_t **tail) 347{ 348 RF_Raid_t *raidPtr; 349 350 /* Insert an in-core parity log (*data) into the tail of 351 a disk queue (*head, *tail). 352 NON-BLOCKING */ 353 354 raidPtr = data->common->raidPtr; 355 RF_ASSERT(data); 356 if (rf_parityLogDebug) 357 printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 358 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 359 if (*tail) 360 { 361 /* append to tail of list */ 362 data->prev = *tail; 363 data->next = NULL; 364 (*tail)->next = data; 365 *tail = data; 366 } 367 else 368 { 369 /* inserting into an empty list */ 370 *head = data; 371 *tail = data; 372 (*head)->prev = NULL; 373 (*tail)->next = NULL; 374 } 375 RF_ASSERT((*head)->prev == NULL); 376 RF_ASSERT((*tail)->next == NULL); 377 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 378} 379 380RF_ParityLogData_t *rf_CreateParityLogData( 381 RF_ParityRecordType_t operation, 382 RF_PhysDiskAddr_t *pda, 383 caddr_t bufPtr, 384 RF_Raid_t *raidPtr, 385 int (*wakeFunc)(RF_DagNode_t *node, int status), 386 void *wakeArg, 387 RF_AccTraceEntry_t *tracerec, 388 RF_Etimer_t startTime) 389{ 390 RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL; 391 RF_CommonLogData_t *common; 392 RF_PhysDiskAddr_t *diskAddress; 393 int boundary, offset = 0; 394 395 /* Return an initialized struct of info to be logged. 396 Build one item per physical disk address, one item per region. 397 398 NON-BLOCKING */ 399 400 diskAddress = pda; 401 common = AllocParityLogCommonData(raidPtr); 402 RF_ASSERT(common); 403 404 common->operation = operation; 405 common->bufPtr = bufPtr; 406 common->raidPtr = raidPtr; 407 common->wakeFunc = wakeFunc; 408 common->wakeArg = wakeArg; 409 common->tracerec = tracerec; 410 common->startTime = startTime; 411 common->cnt = 0; 412 413 if (rf_parityLogDebug) 414 printf("[entering CreateParityLogData]\n"); 415 while (diskAddress) 416 { 417 common->cnt++; 418 data = AllocParityLogData(raidPtr); 419 RF_ASSERT(data); 420 data->common = common; 421 data->next = NULL; 422 data->prev = NULL; 423 data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector); 424 if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) 425 { 426 /* disk address does not cross a region boundary */ 427 data->diskAddress = *diskAddress; 428 data->bufOffset = offset; 429 offset = offset + diskAddress->numSector; 430 EnqueueParityLogData(data, &resultHead, &resultTail); 431 /* adjust disk address */ 432 diskAddress = diskAddress->next; 433 } 434 else 435 { 436 /* disk address crosses a region boundary */ 437 /* find address where region is crossed */ 438 boundary = 0; 439 while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary)) 440 boundary++; 441 442 /* enter data before the boundary */ 443 data->diskAddress = *diskAddress; 444 data->diskAddress.numSector = boundary; 445 data->bufOffset = offset; 446 offset += boundary; 447 EnqueueParityLogData(data, &resultHead, &resultTail); 448 /* adjust disk address */ 449 diskAddress->startSector += boundary; 450 diskAddress->numSector -= boundary; 451 } 452 } 453 if (rf_parityLogDebug) 454 printf("[leaving CreateParityLogData]\n"); 455 return(resultHead); 456} 457 458 459RF_ParityLogData_t *rf_SearchAndDequeueParityLogData( 460 RF_Raid_t *raidPtr, 461 int regionID, 462 RF_ParityLogData_t **head, 463 RF_ParityLogData_t **tail, 464 int ignoreLocks) 465{ 466 RF_ParityLogData_t *w; 467 468 /* Remove and return an in-core parity log from a specified region (regionID). 469 If a matching log is not found, return NULL. 470 471 NON-BLOCKING. 472 */ 473 474 /* walk backward through a list, looking for an entry with a matching region ID */ 475 if (!ignoreLocks) 476 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 477 w = (*tail); 478 while (w) 479 { 480 if (w->regionID == regionID) 481 { 482 /* remove an element from the list */ 483 if (w == *tail) 484 { 485 if (*head == *tail) 486 { 487 /* removing only element in the list */ 488 *head = NULL; 489 *tail = NULL; 490 } 491 else 492 { 493 /* removing last item in the list */ 494 *tail = (*tail)->prev; 495 (*tail)->next = NULL; 496 RF_ASSERT((*head)->prev == NULL); 497 RF_ASSERT((*tail)->next == NULL); 498 } 499 } 500 else 501 { 502 if (w == *head) 503 { 504 /* removing first item in the list */ 505 *head = (*head)->next; 506 (*head)->prev = NULL; 507 RF_ASSERT((*head)->prev == NULL); 508 RF_ASSERT((*tail)->next == NULL); 509 } 510 else 511 { 512 /* removing an item from the middle of the list */ 513 w->prev->next = w->next; 514 w->next->prev = w->prev; 515 RF_ASSERT((*head)->prev == NULL); 516 RF_ASSERT((*tail)->next == NULL); 517 } 518 } 519 w->prev = NULL; 520 w->next = NULL; 521 if (rf_parityLogDebug) 522 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n",w->regionID,(int)w->diskAddress.raidAddress,(int) w->diskAddress.numSector); 523 return(w); 524 } 525 else 526 w = w->prev; 527 } 528 if (!ignoreLocks) 529 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 530 return(NULL); 531} 532 533static RF_ParityLogData_t *DequeueMatchingLogData( 534 RF_Raid_t *raidPtr, 535 RF_ParityLogData_t **head, 536 RF_ParityLogData_t **tail) 537{ 538 RF_ParityLogData_t *logDataList, *logData; 539 int regionID; 540 541 /* Remove and return an in-core parity log from the tail of 542 a disk queue (*head, *tail). Then remove all matching 543 (identical regionIDs) logData and return as a linked list. 544 545 NON-BLOCKING 546 */ 547 548 logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE); 549 if (logDataList) 550 { 551 regionID = logDataList->regionID; 552 logData = logDataList; 553 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); 554 while (logData->next) 555 { 556 logData = logData->next; 557 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); 558 } 559 } 560 return(logDataList); 561} 562 563 564static RF_ParityLog_t *AcquireParityLog( 565 RF_ParityLogData_t *logData, 566 int finish) 567{ 568 RF_ParityLog_t *log = NULL; 569 RF_Raid_t *raidPtr; 570 571 /* Grab a log buffer from the pool and return it. 572 If no buffers are available, return NULL. 573 NON-BLOCKING 574 */ 575 raidPtr = logData->common->raidPtr; 576 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 577 if (raidPtr->parityLogPool.parityLogs) 578 { 579 log = raidPtr->parityLogPool.parityLogs; 580 raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next; 581 log->regionID = logData->regionID; 582 log->numRecords = 0; 583 log->next = NULL; 584 raidPtr->logsInUse++; 585 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 586 } 587 else 588 { 589 /* no logs available, so place ourselves on the queue of work waiting on log buffers 590 this is done while parityLogPool.mutex is held, to ensure synchronization 591 with ReleaseParityLogs. 592 */ 593 if (rf_parityLogDebug) 594 printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish); 595 if (finish) 596 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 597 else 598 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 599 } 600 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 601 return(log); 602} 603 604void rf_ReleaseParityLogs( 605 RF_Raid_t *raidPtr, 606 RF_ParityLog_t *firstLog) 607{ 608 RF_ParityLogData_t *logDataList; 609 RF_ParityLog_t *log, *lastLog; 610 int cnt; 611 612 /* Insert a linked list of parity logs (firstLog) to 613 the free list (parityLogPool.parityLogPool) 614 615 NON-BLOCKING. 616 */ 617 618 RF_ASSERT(firstLog); 619 620 /* Before returning logs to global free list, service all 621 requests which are blocked on logs. Holding mutexes for parityLogPool and parityLogDiskQueue 622 forces synchronization with AcquireParityLog(). 623 */ 624 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 625 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 626 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 627 log = firstLog; 628 if (firstLog) 629 firstLog = firstLog->next; 630 log->numRecords = 0; 631 log->next = NULL; 632 while (logDataList && log) 633 { 634 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 635 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 636 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE); 637 if (rf_parityLogDebug) 638 printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID); 639 if (log == NULL) 640 { 641 log = firstLog; 642 if (firstLog) 643 { 644 firstLog = firstLog->next; 645 log->numRecords = 0; 646 log->next = NULL; 647 } 648 } 649 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 650 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 651 if (log) 652 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 653 } 654 /* return remaining logs to pool */ 655 if (log) 656 { 657 log->next = firstLog; 658 firstLog = log; 659 } 660 if (firstLog) 661 { 662 lastLog = firstLog; 663 raidPtr->logsInUse--; 664 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 665 while (lastLog->next) 666 { 667 lastLog = lastLog->next; 668 raidPtr->logsInUse--; 669 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 670 } 671 lastLog->next = raidPtr->parityLogPool.parityLogs; 672 raidPtr->parityLogPool.parityLogs = firstLog; 673 cnt = 0; 674 log = raidPtr->parityLogPool.parityLogs; 675 while (log) 676 { 677 cnt++; 678 log = log->next; 679 } 680 RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs); 681 } 682 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 683 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 684} 685 686static void ReintLog( 687 RF_Raid_t *raidPtr, 688 int regionID, 689 RF_ParityLog_t *log) 690{ 691 RF_ASSERT(log); 692 693 /* Insert an in-core parity log (log) into the disk queue of reintegration 694 work. Set the flag (reintInProgress) for the specified region (regionID) 695 to indicate that reintegration is in progress for this region. 696 NON-BLOCKING 697 */ 698 699 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 700 raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint complete */ 701 702 if (rf_parityLogDebug) 703 printf("[requesting reintegration of region %d]\n", log->regionID); 704 /* move record to reintegration queue */ 705 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 706 log->next = raidPtr->parityLogDiskQueue.reintQueue; 707 raidPtr->parityLogDiskQueue.reintQueue = log; 708 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 709 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 710 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); 711} 712 713static void FlushLog( 714 RF_Raid_t *raidPtr, 715 RF_ParityLog_t *log) 716{ 717 /* insert a core log (log) into a list of logs (parityLogDiskQueue.flushQueue) 718 waiting to be written to disk. 719 NON-BLOCKING 720 */ 721 722 RF_ASSERT(log); 723 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); 724 RF_ASSERT(log->next == NULL); 725 /* move log to flush queue */ 726 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 727 log->next = raidPtr->parityLogDiskQueue.flushQueue; 728 raidPtr->parityLogDiskQueue.flushQueue = log; 729 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 730 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); 731} 732 733static int DumpParityLogToDisk( 734 int finish, 735 RF_ParityLogData_t *logData) 736{ 737 int i, diskCount, regionID = logData->regionID; 738 RF_ParityLog_t *log; 739 RF_Raid_t *raidPtr; 740 741 raidPtr = logData->common->raidPtr; 742 743 /* Move a core log to disk. If the log disk is full, initiate 744 reintegration. 745 746 Return (0) if we can enqueue the dump immediately, otherwise 747 return (1) to indicate we are blocked on reintegration and 748 control of the thread should be relinquished. 749 750 Caller must hold regionInfo[regionID].mutex 751 752 NON-BLOCKING 753 */ 754 755 if (rf_parityLogDebug) 756 printf("[dumping parity log to disk, region %d]\n", regionID); 757 log = raidPtr->regionInfo[regionID].coreLog; 758 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); 759 RF_ASSERT(log->next == NULL); 760 761 /* if reintegration is in progress, must queue work */ 762 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 763 if (raidPtr->regionInfo[regionID].reintInProgress) 764 { 765 /* Can not proceed since this region is currently being reintegrated. 766 We can not block, so queue remaining work and return */ 767 if (rf_parityLogDebug) 768 printf("[region %d waiting on reintegration]\n",regionID); 769 /* XXX not sure about the use of finish - shouldn't this always be "Enqueue"? */ 770 if (finish) 771 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); 772 else 773 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); 774 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 775 return(1); /* relenquish control of this thread */ 776 } 777 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 778 raidPtr->regionInfo[regionID].coreLog = NULL; 779 if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity) 780 /* IMPORTANT!! this loop bound assumes region disk holds an integral number of core logs */ 781 { 782 /* update disk map for this region */ 783 diskCount = raidPtr->regionInfo[regionID].diskCount; 784 for (i = 0; i < raidPtr->numSectorsPerLog; i++) 785 { 786 raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation; 787 raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr; 788 } 789 log->diskOffset = diskCount; 790 raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog; 791 FlushLog(raidPtr, log); 792 } 793 else 794 { 795 /* no room for log on disk, send it to disk manager and request reintegration */ 796 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity); 797 ReintLog(raidPtr, regionID, log); 798 } 799 if (rf_parityLogDebug) 800 printf("[finished dumping parity log to disk, region %d]\n", regionID); 801 return(0); 802} 803 804int rf_ParityLogAppend( 805 RF_ParityLogData_t *logData, 806 int finish, 807 RF_ParityLog_t **incomingLog, 808 int clearReintFlag) 809{ 810 int regionID, logItem, itemDone; 811 RF_ParityLogData_t *item; 812 int punt, done = RF_FALSE; 813 RF_ParityLog_t *log; 814 RF_Raid_t *raidPtr; 815 RF_Etimer_t timer; 816 int (*wakeFunc)(RF_DagNode_t *node, int status); 817 void *wakeArg; 818 819 /* Add parity to the appropriate log, one sector at a time. 820 This routine is called is called by dag functions ParityLogUpdateFunc 821 and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING. 822 823 Parity to be logged is contained in a linked-list (logData). When 824 this routine returns, every sector in the list will be in one of 825 three places: 826 1) entered into the parity log 827 2) queued, waiting on reintegration 828 3) queued, waiting on a core log 829 830 Blocked work is passed to the ParityLoggingDiskManager for completion. 831 Later, as conditions which required the block are removed, the work 832 reenters this routine with the "finish" parameter set to "RF_TRUE." 833 834 NON-BLOCKING 835 */ 836 837 raidPtr = logData->common->raidPtr; 838 /* lock the region for the first item in logData */ 839 RF_ASSERT(logData != NULL); 840 regionID = logData->regionID; 841 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 842 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); 843 844 if (clearReintFlag) 845 { 846 /* Enable flushing for this region. Holding both locks provides 847 a synchronization barrier with DumpParityLogToDisk 848 */ 849 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 850 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 851 RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE); 852 raidPtr->regionInfo[regionID].diskCount = 0; 853 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; 854 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now enabled */ 855 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 856 } 857 858 /* process each item in logData */ 859 while (logData) 860 { 861 /* remove an item from logData */ 862 item = logData; 863 logData = logData->next; 864 item->next = NULL; 865 item->prev = NULL; 866 867 if (rf_parityLogDebug) 868 printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n",item->regionID,(int)item->diskAddress.raidAddress, (int)item->diskAddress.numSector); 869 870 /* see if we moved to a new region */ 871 if (regionID != item->regionID) 872 { 873 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 874 regionID = item->regionID; 875 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 876 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); 877 } 878 879 punt = RF_FALSE; /* Set to RF_TRUE if work is blocked. This can happen in one of two ways: 880 1) no core log (AcquireParityLog) 881 2) waiting on reintegration (DumpParityLogToDisk) 882 If punt is RF_TRUE, the dataItem was queued, so skip to next item. 883 */ 884 885 /* process item, one sector at a time, until all sectors processed or we punt */ 886 if (item->diskAddress.numSector > 0) 887 done = RF_FALSE; 888 else 889 RF_ASSERT(0); 890 while (!punt && !done) 891 { 892 /* verify that a core log exists for this region */ 893 if (!raidPtr->regionInfo[regionID].coreLog) 894 { 895 /* Attempt to acquire a parity log. 896 If acquisition fails, queue remaining work in data item and move to nextItem. 897 */ 898 if (incomingLog) 899 if (*incomingLog) 900 { 901 RF_ASSERT((*incomingLog)->next == NULL); 902 raidPtr->regionInfo[regionID].coreLog = *incomingLog; 903 raidPtr->regionInfo[regionID].coreLog->regionID = regionID; 904 *incomingLog = NULL; 905 } 906 else 907 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 908 else 909 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 910 /* Note: AcquireParityLog either returns a log or enqueues currentItem */ 911 } 912 if (!raidPtr->regionInfo[regionID].coreLog) 913 punt = RF_TRUE; /* failed to find a core log */ 914 else 915 { 916 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); 917 /* verify that the log has room for new entries */ 918 /* if log is full, dump it to disk and grab a new log */ 919 if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) 920 { 921 /* log is full, dump it to disk */ 922 if (DumpParityLogToDisk(finish, item)) 923 punt = RF_TRUE; /* dump unsuccessful, blocked on reintegration */ 924 else 925 { 926 /* dump was successful */ 927 if (incomingLog) 928 if (*incomingLog) 929 { 930 RF_ASSERT((*incomingLog)->next == NULL); 931 raidPtr->regionInfo[regionID].coreLog = *incomingLog; 932 raidPtr->regionInfo[regionID].coreLog->regionID = regionID; 933 *incomingLog = NULL; 934 } 935 else 936 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 937 else 938 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 939 /* if a core log is not available, must queue work and return */ 940 if (!raidPtr->regionInfo[regionID].coreLog) 941 punt = RF_TRUE; /* blocked on log availability */ 942 } 943 } 944 } 945 /* if we didn't punt on this item, attempt to add a sector to the core log */ 946 if (!punt) 947 { 948 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); 949 /* at this point, we have a core log with enough room for a sector */ 950 /* copy a sector into the log */ 951 log = raidPtr->regionInfo[regionID].coreLog; 952 RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog); 953 logItem = log->numRecords++; 954 log->records[logItem].parityAddr = item->diskAddress; 955 RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr); 956 RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity); 957 log->records[logItem].parityAddr.numSector = 1; 958 log->records[logItem].operation = item->common->operation; 959 bcopy((item->common->bufPtr + (item->bufOffset++ * (1<<item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1<<item->common->raidPtr->logBytesPerSector)), (1<<item->common->raidPtr->logBytesPerSector)); 960 item->diskAddress.numSector--; 961 item->diskAddress.startSector++; 962 if (item->diskAddress.numSector == 0) 963 done = RF_TRUE; 964 } 965 } 966 967 if (!punt) 968 { 969 /* Processed this item completely, decrement count of items 970 to be processed. 971 */ 972 RF_ASSERT(item->diskAddress.numSector == 0); 973 RF_LOCK_MUTEX(item->common->mutex); 974 item->common->cnt--; 975 if (item->common->cnt == 0) 976 itemDone = RF_TRUE; 977 else 978 itemDone = RF_FALSE; 979 RF_UNLOCK_MUTEX(item->common->mutex); 980 if (itemDone) 981 { 982 /* Finished processing all log data for this IO 983 Return structs to free list and invoke wakeup function. 984 */ 985 timer = item->common->startTime; /* grab initial value of timer */ 986 RF_ETIMER_STOP(timer); 987 RF_ETIMER_EVAL(timer); 988 item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer); 989 if (rf_parityLogDebug) 990 printf("[waking process for region %d]\n", item->regionID); 991 wakeFunc = item->common->wakeFunc; 992 wakeArg = item->common->wakeArg; 993 FreeParityLogCommonData(item->common); 994 FreeParityLogData(item); 995 (wakeFunc)(wakeArg, 0); 996 } 997 else 998 FreeParityLogData(item); 999 } 1000 } 1001 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 1002 if (rf_parityLogDebug) 1003 printf("[exiting ParityLogAppend]\n"); 1004 return(0); 1005} 1006 1007 1008void rf_EnableParityLogging(RF_Raid_t *raidPtr) 1009{ 1010 int regionID; 1011 1012 for (regionID = 0; regionID < rf_numParityRegions; regionID++) { 1013 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 1014 raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE; 1015 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 1016 } 1017 if (rf_parityLogDebug) 1018 printf("[parity logging enabled]\n"); 1019} 1020 1021#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 1022