1/* $NetBSD: rf_paritylogDiskMgr.c,v 1.31 2021/07/23 00:54:45 oster Exp $ */ 2/* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28/* Code for flushing and reintegration operations related to parity logging. 29 * 30 */ 31 32#include <sys/cdefs.h> 33__KERNEL_RCSID(0, "$NetBSD: rf_paritylogDiskMgr.c,v 1.31 2021/07/23 00:54:45 oster Exp $"); 34 35#include "rf_archs.h" 36 37#if RF_INCLUDE_PARITYLOGGING > 0 38 39#include <dev/raidframe/raidframevar.h> 40 41#include "rf_threadstuff.h" 42#include "rf_mcpair.h" 43#include "rf_raid.h" 44#include "rf_dag.h" 45#include "rf_dagfuncs.h" 46#include "rf_desc.h" 47#include "rf_layout.h" 48#include "rf_diskqueue.h" 49#include "rf_paritylog.h" 50#include "rf_general.h" 51#include "rf_etimer.h" 52#include "rf_paritylogging.h" 53#include "rf_engine.h" 54#include "rf_dagutils.h" 55#include "rf_map.h" 56#include "rf_parityscan.h" 57 58#include "rf_paritylogDiskMgr.h" 59 60static void *AcquireReintBuffer(RF_RegionBufferQueue_t *); 61 62static void * 63AcquireReintBuffer(RF_RegionBufferQueue_t *pool) 64{ 65 void *bufPtr = NULL; 66 67 /* Return a region buffer from the free list (pool). If the free list 68 * is empty, WAIT. BLOCKING */ 69 70 rf_lock_mutex2(pool->mutex); 71 if (pool->availableBuffers > 0) { 72 bufPtr = pool->buffers[pool->availBuffersIndex]; 73 pool->availableBuffers--; 74 pool->availBuffersIndex++; 75 if (pool->availBuffersIndex == pool->totalBuffers) 76 pool->availBuffersIndex = 0; 77 rf_unlock_mutex2(pool->mutex); 78 } else { 79 RF_PANIC(); /* should never happen in correct config, 80 * single reint */ 81 rf_wait_cond2(pool->cond, pool->mutex); 82 } 83 return (bufPtr); 84} 85 86static void 87ReleaseReintBuffer( 88 RF_RegionBufferQueue_t * pool, 89 void *bufPtr) 90{ 91 /* Insert a region buffer (bufPtr) into the free list (pool). 92 * NON-BLOCKING */ 93 94 rf_lock_mutex2(pool->mutex); 95 pool->availableBuffers++; 96 pool->buffers[pool->emptyBuffersIndex] = bufPtr; 97 pool->emptyBuffersIndex++; 98 if (pool->emptyBuffersIndex == pool->totalBuffers) 99 pool->emptyBuffersIndex = 0; 100 RF_ASSERT(pool->availableBuffers <= pool->totalBuffers); 101 /* 102 * XXXmrg this signal goes with the above "shouldn't happen" wait? 103 */ 104 rf_signal_cond2(pool->cond); 105 rf_unlock_mutex2(pool->mutex); 106} 107 108 109 110static void 111ReadRegionLog( 112 RF_RegionId_t regionID, 113 RF_MCPair_t * rrd_mcpair, 114 void *regionBuffer, 115 RF_Raid_t * raidPtr, 116 RF_DagHeader_t ** rrd_dag_h, 117 RF_AllocListElem_t ** rrd_alloclist, 118 RF_PhysDiskAddr_t ** rrd_pda) 119{ 120 /* Initiate the read a region log from disk. Once initiated, return 121 * to the calling routine. 122 * 123 * NON-BLOCKING */ 124 125 RF_AccTraceEntry_t *tracerec; 126 RF_DagNode_t *rrd_rdNode; 127 128 /* create DAG to read region log from disk */ 129 rf_MakeAllocList(*rrd_alloclist); 130 *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, 131 rf_DiskReadFunc, rf_DiskReadUndoFunc, 132 "Rrl", *rrd_alloclist, 133 RF_DAG_FLAGS_NONE, 134 RF_IO_NORMAL_PRIORITY); 135 136 /* create and initialize PDA for the core log */ 137 *rrd_pda = rf_AllocPDAList(raidPtr, 1); 138 rf_MapLogParityLogging(raidPtr, regionID, 0, 139 &((*rrd_pda)->col), &((*rrd_pda)->startSector)); 140 (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity; 141 142 if ((*rrd_pda)->next) { 143 (*rrd_pda)->next = NULL; 144 printf("set rrd_pda->next to NULL\n"); 145 } 146 /* initialize DAG parameters */ 147 tracerec = RF_Malloc(sizeof(*tracerec)); 148 (*rrd_dag_h)->tracerec = tracerec; 149 rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0]; 150 rrd_rdNode->params[0].p = *rrd_pda; 151/* rrd_rdNode->params[1] = regionBuffer; */ 152 rrd_rdNode->params[2].v = 0; 153 rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0); 154 155 /* launch region log read dag */ 156 rf_DispatchDAG(*rrd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 157 (void *) rrd_mcpair); 158} 159 160 161 162static void 163WriteCoreLog( 164 RF_ParityLog_t * log, 165 RF_MCPair_t * fwr_mcpair, 166 RF_Raid_t * raidPtr, 167 RF_DagHeader_t ** fwr_dag_h, 168 RF_AllocListElem_t ** fwr_alloclist, 169 RF_PhysDiskAddr_t ** fwr_pda) 170{ 171 RF_RegionId_t regionID = log->regionID; 172 RF_AccTraceEntry_t *tracerec; 173 RF_SectorNum_t regionOffset; 174 RF_DagNode_t *fwr_wrNode; 175 176 /* Initiate the write of a core log to a region log disk. Once 177 * initiated, return to the calling routine. 178 * 179 * NON-BLOCKING */ 180 181 /* create DAG to write a core log to a region log disk */ 182 rf_MakeAllocList(*fwr_alloclist); 183 *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, 184 rf_DiskWriteFunc, rf_DiskWriteUndoFunc, 185 "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); 186 187 *fwr_pda = rf_AllocPDAList(raidPtr, 1); 188 regionOffset = log->diskOffset; 189 rf_MapLogParityLogging(raidPtr, regionID, regionOffset, 190 &((*fwr_pda)->col), 191 &((*fwr_pda)->startSector)); 192 (*fwr_pda)->numSector = raidPtr->numSectorsPerLog; 193 194 /* initialize DAG parameters */ 195 tracerec = RF_Malloc(sizeof(*tracerec)); 196 (*fwr_dag_h)->tracerec = tracerec; 197 fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0]; 198 fwr_wrNode->params[0].p = *fwr_pda; 199/* fwr_wrNode->params[1] = log->bufPtr; */ 200 fwr_wrNode->params[2].v = 0; 201 fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0); 202 203 /* launch the dag to write the core log to disk */ 204 rf_DispatchDAG(*fwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 205 (void *) fwr_mcpair); 206} 207 208 209static void 210ReadRegionParity( 211 RF_RegionId_t regionID, 212 RF_MCPair_t * prd_mcpair, 213 void *parityBuffer, 214 RF_Raid_t * raidPtr, 215 RF_DagHeader_t ** prd_dag_h, 216 RF_AllocListElem_t ** prd_alloclist, 217 RF_PhysDiskAddr_t ** prd_pda) 218{ 219 /* Initiate the read region parity from disk. Once initiated, return 220 * to the calling routine. 221 * 222 * NON-BLOCKING */ 223 224 RF_AccTraceEntry_t *tracerec; 225 RF_DagNode_t *prd_rdNode; 226 227 /* create DAG to read region parity from disk */ 228 rf_MakeAllocList(*prd_alloclist); 229 *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, 230 rf_DiskReadUndoFunc, "Rrp", 231 *prd_alloclist, RF_DAG_FLAGS_NONE, 232 RF_IO_NORMAL_PRIORITY); 233 234 /* create and initialize PDA for region parity */ 235 *prd_pda = rf_AllocPDAList(raidPtr, 1); 236 rf_MapRegionParity(raidPtr, regionID, 237 &((*prd_pda)->col), &((*prd_pda)->startSector), 238 &((*prd_pda)->numSector)); 239 if (rf_parityLogDebug) 240 printf("[reading %d sectors of parity from region %d]\n", 241 (int) (*prd_pda)->numSector, regionID); 242 if ((*prd_pda)->next) { 243 (*prd_pda)->next = NULL; 244 printf("set prd_pda->next to NULL\n"); 245 } 246 /* initialize DAG parameters */ 247 tracerec = RF_Malloc(sizeof(*tracerec)); 248 (*prd_dag_h)->tracerec = tracerec; 249 prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0]; 250 prd_rdNode->params[0].p = *prd_pda; 251 prd_rdNode->params[1].p = parityBuffer; 252 prd_rdNode->params[2].v = 0; 253 prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0); 254#if RF_DEBUG_VALIDATE_DAG 255 if (rf_validateDAGDebug) 256 rf_ValidateDAG(*prd_dag_h); 257#endif 258 /* launch region parity read dag */ 259 rf_DispatchDAG(*prd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 260 (void *) prd_mcpair); 261} 262 263static void 264WriteRegionParity( 265 RF_RegionId_t regionID, 266 RF_MCPair_t * pwr_mcpair, 267 void *parityBuffer, 268 RF_Raid_t * raidPtr, 269 RF_DagHeader_t ** pwr_dag_h, 270 RF_AllocListElem_t ** pwr_alloclist, 271 RF_PhysDiskAddr_t ** pwr_pda) 272{ 273 /* Initiate the write of region parity to disk. Once initiated, return 274 * to the calling routine. 275 * 276 * NON-BLOCKING */ 277 278 RF_AccTraceEntry_t *tracerec; 279 RF_DagNode_t *pwr_wrNode; 280 281 /* create DAG to write region log from disk */ 282 rf_MakeAllocList(*pwr_alloclist); 283 *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, 284 rf_DiskWriteFunc, rf_DiskWriteUndoFunc, 285 "Wrp", *pwr_alloclist, 286 RF_DAG_FLAGS_NONE, 287 RF_IO_NORMAL_PRIORITY); 288 289 /* create and initialize PDA for region parity */ 290 *pwr_pda = rf_AllocPDAList(raidPtr, 1); 291 rf_MapRegionParity(raidPtr, regionID, 292 &((*pwr_pda)->col), &((*pwr_pda)->startSector), 293 &((*pwr_pda)->numSector)); 294 295 /* initialize DAG parameters */ 296 tracerec = RF_Malloc(sizeof(*tracerec)); 297 (*pwr_dag_h)->tracerec = tracerec; 298 pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0]; 299 pwr_wrNode->params[0].p = *pwr_pda; 300/* pwr_wrNode->params[1] = parityBuffer; */ 301 pwr_wrNode->params[2].v = 0; 302 pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0); 303 304 /* launch the dag to write region parity to disk */ 305 rf_DispatchDAG(*pwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 306 (void *) pwr_mcpair); 307} 308 309static void 310FlushLogsToDisk( 311 RF_Raid_t * raidPtr, 312 RF_ParityLog_t * logList) 313{ 314 /* Flush a linked list of core logs to the log disk. Logs contain the 315 * disk location where they should be written. Logs were written in 316 * FIFO order and that order must be preserved. 317 * 318 * Recommended optimizations: 1) allow multiple flushes to occur 319 * simultaneously 2) coalesce contiguous flush operations 320 * 321 * BLOCKING */ 322 323 RF_ParityLog_t *log; 324 RF_RegionId_t regionID; 325 RF_MCPair_t *fwr_mcpair; 326 RF_DagHeader_t *fwr_dag_h; 327 RF_AllocListElem_t *fwr_alloclist; 328 RF_PhysDiskAddr_t *fwr_pda; 329 330 fwr_mcpair = rf_AllocMCPair(raidPtr); 331 RF_LOCK_MCPAIR(fwr_mcpair); 332 333 RF_ASSERT(logList); 334 log = logList; 335 while (log) { 336 regionID = log->regionID; 337 338 /* create and launch a DAG to write the core log */ 339 if (rf_parityLogDebug) 340 printf("[initiating write of core log for region %d]\n", regionID); 341 fwr_mcpair->flag = RF_FALSE; 342 WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, 343 &fwr_alloclist, &fwr_pda); 344 345 /* wait for the DAG to complete */ 346 while (!fwr_mcpair->flag) 347 RF_WAIT_MCPAIR(fwr_mcpair); 348 if (fwr_dag_h->status != rf_enable) { 349 RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID); 350 RF_ASSERT(0); 351 } 352 /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */ 353 rf_FreePhysDiskAddr(raidPtr, fwr_pda); 354 rf_FreeDAG(fwr_dag_h); 355 rf_FreeAllocList(fwr_alloclist); 356 357 log = log->next; 358 } 359 RF_UNLOCK_MCPAIR(fwr_mcpair); 360 rf_FreeMCPair(raidPtr, fwr_mcpair); 361 rf_ReleaseParityLogs(raidPtr, logList); 362} 363 364static void 365ReintegrateRegion( 366 RF_Raid_t * raidPtr, 367 RF_RegionId_t regionID, 368 RF_ParityLog_t * coreLog) 369{ 370 RF_MCPair_t *rrd_mcpair = NULL, *prd_mcpair, *pwr_mcpair; 371 RF_DagHeader_t *rrd_dag_h = NULL, *prd_dag_h, *pwr_dag_h; 372 RF_AllocListElem_t *rrd_alloclist = NULL, *prd_alloclist, *pwr_alloclist; 373 RF_PhysDiskAddr_t *rrd_pda = NULL, *prd_pda, *pwr_pda; 374 void *parityBuffer, *regionBuffer = NULL; 375 376 /* Reintegrate a region (regionID). 377 * 378 * 1. acquire region and parity buffers 379 * 2. read log from disk 380 * 3. read parity from disk 381 * 4. apply log to parity 382 * 5. apply core log to parity 383 * 6. write new parity to disk 384 * 385 * BLOCKING */ 386 387 if (rf_parityLogDebug) 388 printf("[reintegrating region %d]\n", regionID); 389 390 /* initiate read of region parity */ 391 if (rf_parityLogDebug) 392 printf("[initiating read of parity for region %d]\n",regionID); 393 parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool); 394 prd_mcpair = rf_AllocMCPair(raidPtr); 395 RF_LOCK_MCPAIR(prd_mcpair); 396 prd_mcpair->flag = RF_FALSE; 397 ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, 398 &prd_dag_h, &prd_alloclist, &prd_pda); 399 400 /* if region log nonempty, initiate read */ 401 if (raidPtr->regionInfo[regionID].diskCount > 0) { 402 if (rf_parityLogDebug) 403 printf("[initiating read of disk log for region %d]\n", 404 regionID); 405 regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool); 406 rrd_mcpair = rf_AllocMCPair(raidPtr); 407 RF_LOCK_MCPAIR(rrd_mcpair); 408 rrd_mcpair->flag = RF_FALSE; 409 ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, 410 &rrd_dag_h, &rrd_alloclist, &rrd_pda); 411 } 412 /* wait on read of region parity to complete */ 413 while (!prd_mcpair->flag) { 414 RF_WAIT_MCPAIR(prd_mcpair); 415 } 416 RF_UNLOCK_MCPAIR(prd_mcpair); 417 if (prd_dag_h->status != rf_enable) { 418 RF_ERRORMSG("Unable to read parity from disk\n"); 419 /* add code to fail the parity disk */ 420 RF_ASSERT(0); 421 } 422 /* apply core log to parity */ 423 /* if (coreLog) ApplyLogsToParity(coreLog, parityBuffer); */ 424 425 if (raidPtr->regionInfo[regionID].diskCount > 0) { 426 /* wait on read of region log to complete */ 427 while (!rrd_mcpair->flag) 428 RF_WAIT_MCPAIR(rrd_mcpair); 429 RF_UNLOCK_MCPAIR(rrd_mcpair); 430 if (rrd_dag_h->status != rf_enable) { 431 RF_ERRORMSG("Unable to read region log from disk\n"); 432 /* add code to fail the log disk */ 433 RF_ASSERT(0); 434 } 435 /* apply region log to parity */ 436 /* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */ 437 /* release resources associated with region log */ 438 /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */ 439 rf_FreePhysDiskAddr(raidPtr, rrd_pda); 440 rf_FreeDAG(rrd_dag_h); 441 rf_FreeAllocList(rrd_alloclist); 442 rf_FreeMCPair(raidPtr, rrd_mcpair); 443 ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer); 444 } 445 /* write reintegrated parity to disk */ 446 if (rf_parityLogDebug) 447 printf("[initiating write of parity for region %d]\n", 448 regionID); 449 pwr_mcpair = rf_AllocMCPair(raidPtr); 450 RF_LOCK_MCPAIR(pwr_mcpair); 451 pwr_mcpair->flag = RF_FALSE; 452 WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, 453 &pwr_dag_h, &pwr_alloclist, &pwr_pda); 454 while (!pwr_mcpair->flag) 455 RF_WAIT_MCPAIR(pwr_mcpair); 456 RF_UNLOCK_MCPAIR(pwr_mcpair); 457 if (pwr_dag_h->status != rf_enable) { 458 RF_ERRORMSG("Unable to write parity to disk\n"); 459 /* add code to fail the parity disk */ 460 RF_ASSERT(0); 461 } 462 /* release resources associated with read of old parity */ 463 /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */ 464 rf_FreePhysDiskAddr(raidPtr, prd_pda); 465 rf_FreeDAG(prd_dag_h); 466 rf_FreeAllocList(prd_alloclist); 467 rf_FreeMCPair(raidPtr, prd_mcpair); 468 469 /* release resources associated with write of new parity */ 470 ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer); 471 /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */ 472 rf_FreePhysDiskAddr(raidPtr, pwr_pda); 473 rf_FreeDAG(pwr_dag_h); 474 rf_FreeAllocList(pwr_alloclist); 475 rf_FreeMCPair(raidPtr, pwr_mcpair); 476 477 if (rf_parityLogDebug) 478 printf("[finished reintegrating region %d]\n", regionID); 479} 480 481 482 483static void 484ReintegrateLogs( 485 RF_Raid_t * raidPtr, 486 RF_ParityLog_t * logList) 487{ 488 RF_ParityLog_t *log, *freeLogList = NULL; 489 RF_ParityLogData_t *logData, *logDataList; 490 RF_RegionId_t regionID; 491 492 RF_ASSERT(logList); 493 while (logList) { 494 log = logList; 495 logList = logList->next; 496 log->next = NULL; 497 regionID = log->regionID; 498 ReintegrateRegion(raidPtr, regionID, log); 499 log->numRecords = 0; 500 501 /* remove all items which are blocked on reintegration of this 502 * region */ 503 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 504 logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, 505 &raidPtr->parityLogDiskQueue.reintBlockHead, 506 &raidPtr->parityLogDiskQueue.reintBlockTail, 507 RF_TRUE); 508 logDataList = logData; 509 while (logData) { 510 logData->next = rf_SearchAndDequeueParityLogData( 511 raidPtr, regionID, 512 &raidPtr->parityLogDiskQueue.reintBlockHead, 513 &raidPtr->parityLogDiskQueue.reintBlockTail, 514 RF_TRUE); 515 logData = logData->next; 516 } 517 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 518 519 /* process blocked log data and clear reintInProgress flag for 520 * this region */ 521 if (logDataList) 522 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE); 523 else { 524 /* Enable flushing for this region. Holding both 525 * locks provides a synchronization barrier with 526 * DumpParityLogToDisk */ 527 rf_lock_mutex2(raidPtr->regionInfo[regionID].mutex); 528 rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex); 529 /* XXXmrg: don't need this? */ 530 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 531 raidPtr->regionInfo[regionID].diskCount = 0; 532 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; 533 rf_unlock_mutex2(raidPtr->regionInfo[regionID].mutex); 534 rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now 535 * enabled */ 536 /* XXXmrg: don't need this? */ 537 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 538 } 539 /* if log wasn't used, attach it to the list of logs to be 540 * returned */ 541 if (log) { 542 log->next = freeLogList; 543 freeLogList = log; 544 } 545 } 546 if (freeLogList) 547 rf_ReleaseParityLogs(raidPtr, freeLogList); 548} 549 550int 551rf_ShutdownLogging(RF_Raid_t * raidPtr) 552{ 553 /* shutdown parity logging 1) disable parity logging in all regions 2) 554 * reintegrate all regions */ 555 556 RF_SectorCount_t diskCount; 557 RF_RegionId_t regionID; 558 RF_ParityLog_t *log; 559 560 if (rf_parityLogDebug) 561 printf("[shutting down parity logging]\n"); 562 /* Since parity log maps are volatile, we must reintegrate all 563 * regions. */ 564 if (rf_forceParityLogReint) { 565 for (regionID = 0; regionID < rf_numParityRegions; regionID++) { 566 rf_lock_mutex2(raidPtr->regionInfo[regionID].mutex); 567 raidPtr->regionInfo[regionID].loggingEnabled = 568 RF_FALSE; 569 log = raidPtr->regionInfo[regionID].coreLog; 570 raidPtr->regionInfo[regionID].coreLog = NULL; 571 diskCount = raidPtr->regionInfo[regionID].diskCount; 572 rf_unlock_mutex2(raidPtr->regionInfo[regionID].mutex); 573 if (diskCount > 0 || log != NULL) 574 ReintegrateRegion(raidPtr, regionID, log); 575 if (log != NULL) 576 rf_ReleaseParityLogs(raidPtr, log); 577 } 578 } 579 if (rf_parityLogDebug) { 580 printf("[parity logging disabled]\n"); 581 printf("[should be done!]\n"); 582 } 583 return (0); 584} 585 586void 587rf_ParityLoggingDiskManager(void *v) 588{ 589 RF_Raid_t *raidPtr = v; 590 RF_ParityLog_t *reintQueue, *flushQueue; 591 int workNeeded, done = RF_FALSE; 592 int s; 593 594 /* Main program for parity logging disk thread. This routine waits 595 * for work to appear in either the flush or reintegration queues and 596 * is responsible for flushing core logs to the log disk as well as 597 * reintegrating parity regions. 598 * 599 * BLOCKING */ 600 601 s = splbio(); 602 603 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 604 605 /* 606 * Inform our creator that we're running. Don't bother doing the 607 * mutex lock/unlock dance- we locked above, and we'll unlock 608 * below with nothing to do, yet. 609 */ 610 raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING; 611 rf_signal_cond2(raidPtr->parityLogDiskQueue.cond); 612 613 /* empty the work queues */ 614 flushQueue = raidPtr->parityLogDiskQueue.flushQueue; 615 raidPtr->parityLogDiskQueue.flushQueue = NULL; 616 reintQueue = raidPtr->parityLogDiskQueue.reintQueue; 617 raidPtr->parityLogDiskQueue.reintQueue = NULL; 618 workNeeded = (flushQueue || reintQueue); 619 620 while (!done) { 621 while (workNeeded) { 622 /* First, flush all logs in the flush queue, freeing 623 * buffers Second, reintegrate all regions which are 624 * reported as full. Third, append queued log data 625 * until blocked. 626 * 627 * Note: Incoming appends (ParityLogAppend) can block on 628 * either 1. empty buffer pool 2. region under 629 * reintegration To preserve a global FIFO ordering of 630 * appends, buffers are not released to the world 631 * until those appends blocked on buffers are removed 632 * from the append queue. Similarly, regions which 633 * are reintegrated are not opened for general use 634 * until the append queue has been emptied. */ 635 636 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 637 638 /* empty flushQueue, using free'd log buffers to 639 * process bufTail */ 640 if (flushQueue) 641 FlushLogsToDisk(raidPtr, flushQueue); 642 643 /* empty reintQueue, flushing from reintTail as we go */ 644 if (reintQueue) 645 ReintegrateLogs(raidPtr, reintQueue); 646 647 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 648 flushQueue = raidPtr->parityLogDiskQueue.flushQueue; 649 raidPtr->parityLogDiskQueue.flushQueue = NULL; 650 reintQueue = raidPtr->parityLogDiskQueue.reintQueue; 651 raidPtr->parityLogDiskQueue.reintQueue = NULL; 652 workNeeded = (flushQueue || reintQueue); 653 } 654 /* no work is needed at this point */ 655 if (raidPtr->parityLogDiskQueue.threadState & RF_PLOG_TERMINATE) { 656 /* shutdown parity logging 1. disable parity logging 657 * in all regions 2. reintegrate all regions */ 658 done = RF_TRUE; /* thread disabled, no work needed */ 659 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 660 rf_ShutdownLogging(raidPtr); 661 } 662 if (!done) { 663 /* thread enabled, no work needed, so sleep */ 664 if (rf_parityLogDebug) 665 printf("[parity logging disk manager sleeping]\n"); 666 rf_wait_cond2(raidPtr->parityLogDiskQueue.cond, 667 raidPtr->parityLogDiskQueue.mutex); 668 if (rf_parityLogDebug) 669 printf("[parity logging disk manager just woke up]\n"); 670 flushQueue = raidPtr->parityLogDiskQueue.flushQueue; 671 raidPtr->parityLogDiskQueue.flushQueue = NULL; 672 reintQueue = raidPtr->parityLogDiskQueue.reintQueue; 673 raidPtr->parityLogDiskQueue.reintQueue = NULL; 674 workNeeded = (flushQueue || reintQueue); 675 } 676 } 677 /* 678 * Announce that we're done. 679 */ 680 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 681 raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN; 682 rf_signal_cond2(raidPtr->parityLogDiskQueue.cond); 683 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 684 685 splx(s); 686 687 /* 688 * In the NetBSD kernel, the thread must exit; returning would 689 * cause the proc trampoline to attempt to return to userspace. 690 */ 691 kthread_exit(0); /* does not return */ 692} 693#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 694