1/* $NetBSD: rf_paritylogging.c,v 1.35 2019/02/09 03:34:00 christos Exp $ */ 2/* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 30/* 31 parity logging configuration, dag selection, and mapping is implemented here 32 */ 33 34#include <sys/cdefs.h> 35__KERNEL_RCSID(0, "$NetBSD: rf_paritylogging.c,v 1.35 2019/02/09 03:34:00 christos Exp $"); 36 37#include "rf_archs.h" 38 39#if RF_INCLUDE_PARITYLOGGING > 0 40 41#include <dev/raidframe/raidframevar.h> 42 43#include "rf_raid.h" 44#include "rf_dag.h" 45#include "rf_dagutils.h" 46#include "rf_dagfuncs.h" 47#include "rf_dagffrd.h" 48#include "rf_dagffwr.h" 49#include "rf_dagdegrd.h" 50#include "rf_dagdegwr.h" 51#include "rf_paritylog.h" 52#include "rf_paritylogDiskMgr.h" 53#include "rf_paritylogging.h" 54#include "rf_parityloggingdags.h" 55#include "rf_general.h" 56#include "rf_map.h" 57#include "rf_utils.h" 58#include "rf_shutdown.h" 59 60typedef struct RF_ParityLoggingConfigInfo_s { 61 RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by 62 * IdentifyStripe */ 63} RF_ParityLoggingConfigInfo_t; 64 65static void FreeRegionInfo(RF_Raid_t * raidPtr, RF_RegionId_t regionID); 66static void rf_ShutdownParityLogging(RF_ThreadArg_t arg); 67static void rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg); 68static void rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg); 69static void rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg); 70static void rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg); 71static void rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg); 72 73int 74rf_ConfigureParityLogging( 75 RF_ShutdownList_t ** listp, 76 RF_Raid_t * raidPtr, 77 RF_Config_t * cfgPtr) 78{ 79 int i, j, startdisk, rc; 80 RF_SectorCount_t totalLogCapacity, fragmentation, lastRegionCapacity; 81 RF_SectorCount_t parityBufferCapacity, maxRegionParityRange; 82 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 83 RF_ParityLoggingConfigInfo_t *info; 84 RF_ParityLog_t *l = NULL, *next; 85 void *lHeapPtr; 86 87 if (rf_numParityRegions <= 0) 88 return(EINVAL); 89 90 /* 91 * We create multiple entries on the shutdown list here, since 92 * this configuration routine is fairly complicated in and of 93 * itself, and this makes backing out of a failed configuration 94 * much simpler. 95 */ 96 97 raidPtr->numSectorsPerLog = RF_DEFAULT_NUM_SECTORS_PER_LOG; 98 99 /* create a parity logging configuration structure */ 100 info = RF_MallocAndAdd(sizeof(*info), raidPtr->cleanupList); 101 if (info == NULL) 102 return (ENOMEM); 103 layoutPtr->layoutSpecificInfo = (void *) info; 104 105 /* the stripe identifier must identify the disks in each stripe, IN 106 * THE ORDER THAT THEY APPEAR IN THE STRIPE. */ 107 info->stripeIdentifier = rf_make_2d_array((raidPtr->numCol), 108 (raidPtr->numCol), 109 raidPtr->cleanupList); 110 if (info->stripeIdentifier == NULL) 111 return (ENOMEM); 112 113 startdisk = 0; 114 for (i = 0; i < (raidPtr->numCol); i++) { 115 for (j = 0; j < (raidPtr->numCol); j++) { 116 info->stripeIdentifier[i][j] = (startdisk + j) % 117 (raidPtr->numCol - 1); 118 } 119 if ((--startdisk) < 0) 120 startdisk = raidPtr->numCol - 1 - 1; 121 } 122 123 /* fill in the remaining layout parameters */ 124 layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; 125 layoutPtr->numParityCol = 1; 126 layoutPtr->numParityLogCol = 1; 127 layoutPtr->numDataCol = raidPtr->numCol - layoutPtr->numParityCol - 128 layoutPtr->numParityLogCol; 129 layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * 130 layoutPtr->sectorsPerStripeUnit; 131 layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; 132 raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * 133 layoutPtr->sectorsPerStripeUnit; 134 135 raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * 136 layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; 137 138 /* configure parity log parameters 139 * 140 * parameter comment/constraints 141 * ------------------------------------------- 142 * numParityRegions* all regions (except possibly last) 143 * of equal size 144 * totalInCoreLogCapacity* amount of memory in bytes available 145 * for in-core logs (default 1 MB) 146 * numSectorsPerLog# capacity of an in-core log in sectors 147 * (1 * disk track) 148 * numParityLogs total number of in-core logs, 149 * should be at least numParityRegions 150 * regionLogCapacity size of a region log (except possibly 151 * last one) in sectors 152 * totalLogCapacity total amount of log space in sectors 153 * 154 * where '*' denotes a user settable parameter. 155 * Note that logs are fixed to be the size of a disk track, 156 * value #defined in rf_paritylog.h 157 * 158 */ 159 160 totalLogCapacity = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit * layoutPtr->numParityLogCol; 161 raidPtr->regionLogCapacity = totalLogCapacity / rf_numParityRegions; 162 if (rf_parityLogDebug) 163 printf("bytes per sector %d\n", raidPtr->bytesPerSector); 164 165 /* reduce fragmentation within a disk region by adjusting the number 166 * of regions in an attempt to allow an integral number of logs to fit 167 * into a disk region */ 168 fragmentation = raidPtr->regionLogCapacity % raidPtr->numSectorsPerLog; 169 if (fragmentation > 0) 170 for (i = 1; i < (raidPtr->numSectorsPerLog / 2); i++) { 171 if (((totalLogCapacity / (rf_numParityRegions + i)) % 172 raidPtr->numSectorsPerLog) < fragmentation) { 173 rf_numParityRegions++; 174 raidPtr->regionLogCapacity = totalLogCapacity / 175 rf_numParityRegions; 176 fragmentation = raidPtr->regionLogCapacity % 177 raidPtr->numSectorsPerLog; 178 } 179 if (((totalLogCapacity / (rf_numParityRegions - i)) % 180 raidPtr->numSectorsPerLog) < fragmentation) { 181 rf_numParityRegions--; 182 raidPtr->regionLogCapacity = totalLogCapacity / 183 rf_numParityRegions; 184 fragmentation = raidPtr->regionLogCapacity % 185 raidPtr->numSectorsPerLog; 186 } 187 } 188 /* ensure integral number of regions per log */ 189 raidPtr->regionLogCapacity = (raidPtr->regionLogCapacity / 190 raidPtr->numSectorsPerLog) * 191 raidPtr->numSectorsPerLog; 192 193 raidPtr->numParityLogs = rf_totalInCoreLogCapacity / 194 (raidPtr->bytesPerSector * raidPtr->numSectorsPerLog); 195 /* to avoid deadlock, must ensure that enough logs exist for each 196 * region to have one simultaneously */ 197 if (raidPtr->numParityLogs < rf_numParityRegions) 198 raidPtr->numParityLogs = rf_numParityRegions; 199 200 /* create region information structs */ 201 printf("Allocating %d bytes for in-core parity region info\n", 202 (int) (rf_numParityRegions * sizeof(RF_RegionInfo_t))); 203 raidPtr->regionInfo = RF_Malloc( 204 rf_numParityRegions * sizeof(*raidPtr->regionInfo)); 205 if (raidPtr->regionInfo == NULL) 206 return (ENOMEM); 207 208 /* last region may not be full capacity */ 209 lastRegionCapacity = raidPtr->regionLogCapacity; 210 while ((rf_numParityRegions - 1) * raidPtr->regionLogCapacity + 211 lastRegionCapacity > totalLogCapacity) 212 lastRegionCapacity = lastRegionCapacity - 213 raidPtr->numSectorsPerLog; 214 215 raidPtr->regionParityRange = raidPtr->sectorsPerDisk / 216 rf_numParityRegions; 217 maxRegionParityRange = raidPtr->regionParityRange; 218 219/* i can't remember why this line is in the code -wvcii 6/30/95 */ 220/* if (raidPtr->sectorsPerDisk % rf_numParityRegions > 0) 221 regionParityRange++; */ 222 223 /* build pool of unused parity logs */ 224 printf("Allocating %d bytes for %d parity logs\n", 225 raidPtr->numParityLogs * raidPtr->numSectorsPerLog * 226 raidPtr->bytesPerSector, 227 raidPtr->numParityLogs); 228 raidPtr->parityLogBufferHeap = RF_Malloc(raidPtr->numParityLogs 229 * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); 230 if (raidPtr->parityLogBufferHeap == NULL) 231 return (ENOMEM); 232 lHeapPtr = raidPtr->parityLogBufferHeap; 233 rf_init_mutex2(raidPtr->parityLogPool.mutex, IPL_VM); 234 for (i = 0; i < raidPtr->numParityLogs; i++) { 235 if (i == 0) { 236 raidPtr->parityLogPool.parityLogs = 237 RF_Malloc( 238 sizeof(*raidPtr->parityLogPool.parityLogs)); 239 if (raidPtr->parityLogPool.parityLogs == NULL) { 240 RF_Free(raidPtr->parityLogBufferHeap, 241 raidPtr->numParityLogs * 242 raidPtr->numSectorsPerLog * 243 raidPtr->bytesPerSector); 244 return (ENOMEM); 245 } 246 l = raidPtr->parityLogPool.parityLogs; 247 } else { 248 l->next = RF_Malloc(sizeof(*l->next)); 249 if (l->next == NULL) { 250 RF_Free(raidPtr->parityLogBufferHeap, 251 raidPtr->numParityLogs * 252 raidPtr->numSectorsPerLog * 253 raidPtr->bytesPerSector); 254 for (l = raidPtr->parityLogPool.parityLogs; 255 l; 256 l = next) { 257 next = l->next; 258 if (l->records) 259 RF_Free(l->records, (raidPtr->numSectorsPerLog * sizeof(RF_ParityLogRecord_t))); 260 RF_Free(l, sizeof(RF_ParityLog_t)); 261 } 262 return (ENOMEM); 263 } 264 l = l->next; 265 } 266 l->bufPtr = lHeapPtr; 267 lHeapPtr = (char *)lHeapPtr + raidPtr->numSectorsPerLog * 268 raidPtr->bytesPerSector; 269 l->records = RF_Malloc(raidPtr->numSectorsPerLog * 270 sizeof(*l->records)); 271 if (l->records == NULL) { 272 RF_Free(raidPtr->parityLogBufferHeap, 273 raidPtr->numParityLogs * 274 raidPtr->numSectorsPerLog * 275 raidPtr->bytesPerSector); 276 for (l = raidPtr->parityLogPool.parityLogs; 277 l; 278 l = next) { 279 next = l->next; 280 if (l->records) 281 RF_Free(l->records, 282 (raidPtr->numSectorsPerLog * 283 sizeof(RF_ParityLogRecord_t))); 284 RF_Free(l, sizeof(RF_ParityLog_t)); 285 } 286 return (ENOMEM); 287 } 288 } 289 rf_ShutdownCreate(listp, rf_ShutdownParityLoggingPool, raidPtr); 290 /* build pool of region buffers */ 291 rf_init_mutex2(raidPtr->regionBufferPool.mutex, IPL_VM); 292 rf_init_cond2(raidPtr->regionBufferPool.cond, "rfrbpl"); 293 raidPtr->regionBufferPool.bufferSize = raidPtr->regionLogCapacity * 294 raidPtr->bytesPerSector; 295 printf("regionBufferPool.bufferSize %d\n", 296 raidPtr->regionBufferPool.bufferSize); 297 298 /* for now, only one region at a time may be reintegrated */ 299 raidPtr->regionBufferPool.totalBuffers = 1; 300 301 raidPtr->regionBufferPool.availableBuffers = 302 raidPtr->regionBufferPool.totalBuffers; 303 raidPtr->regionBufferPool.availBuffersIndex = 0; 304 raidPtr->regionBufferPool.emptyBuffersIndex = 0; 305 printf("Allocating %d bytes for regionBufferPool\n", 306 (int) (raidPtr->regionBufferPool.totalBuffers * 307 sizeof(void *))); 308 raidPtr->regionBufferPool.buffers = RF_Malloc( 309 raidPtr->regionBufferPool.totalBuffers * 310 sizeof(*raidPtr->regionBufferPool.buffers)); 311 if (raidPtr->regionBufferPool.buffers == NULL) { 312 return (ENOMEM); 313 } 314 for (i = 0; i < raidPtr->regionBufferPool.totalBuffers; i++) { 315 printf("Allocating %d bytes for regionBufferPool#%d\n", 316 (int) (raidPtr->regionBufferPool.bufferSize * 317 sizeof(char)), i); 318 raidPtr->regionBufferPool.buffers[i] = 319 RF_Malloc(raidPtr->regionBufferPool.bufferSize); 320 if (raidPtr->regionBufferPool.buffers[i] == NULL) { 321 for (j = 0; j < i; j++) { 322 RF_Free(raidPtr->regionBufferPool.buffers[i], 323 raidPtr->regionBufferPool.bufferSize * 324 sizeof(char)); 325 } 326 RF_Free(raidPtr->regionBufferPool.buffers, 327 raidPtr->regionBufferPool.totalBuffers * 328 sizeof(void *)); 329 return (ENOMEM); 330 } 331 printf("raidPtr->regionBufferPool.buffers[%d] = %lx\n", i, 332 (long) raidPtr->regionBufferPool.buffers[i]); 333 } 334 rf_ShutdownCreate(listp, 335 rf_ShutdownParityLoggingRegionBufferPool, 336 raidPtr); 337 /* build pool of parity buffers */ 338 parityBufferCapacity = maxRegionParityRange; 339 rf_init_mutex2(raidPtr->parityBufferPool.mutex, IPL_VM); 340 rf_init_cond2(raidPtr->parityBufferPool.cond, "rfpbpl"); 341 raidPtr->parityBufferPool.bufferSize = parityBufferCapacity * 342 raidPtr->bytesPerSector; 343 printf("parityBufferPool.bufferSize %d\n", 344 raidPtr->parityBufferPool.bufferSize); 345 346 /* for now, only one region at a time may be reintegrated */ 347 raidPtr->parityBufferPool.totalBuffers = 1; 348 349 raidPtr->parityBufferPool.availableBuffers = 350 raidPtr->parityBufferPool.totalBuffers; 351 raidPtr->parityBufferPool.availBuffersIndex = 0; 352 raidPtr->parityBufferPool.emptyBuffersIndex = 0; 353 printf("Allocating %d bytes for parityBufferPool of %d units\n", 354 (int) (raidPtr->parityBufferPool.totalBuffers * 355 sizeof(void *)), 356 raidPtr->parityBufferPool.totalBuffers ); 357 raidPtr->parityBufferPool.buffers = RF_Malloc( 358 raidPtr->parityBufferPool.totalBuffers * 359 sizeof(*raidPtr->parityBufferPool.buffers)); 360 if (raidPtr->parityBufferPool.buffers == NULL) { 361 return (ENOMEM); 362 } 363 for (i = 0; i < raidPtr->parityBufferPool.totalBuffers; i++) { 364 printf("Allocating %d bytes for parityBufferPool#%d\n", 365 (int) (raidPtr->parityBufferPool.bufferSize * 366 sizeof(char)),i); 367 raidPtr->parityBufferPool.buffers[i] = RF_Malloc( 368 raidPtr->parityBufferPool.bufferSize); 369 if (raidPtr->parityBufferPool.buffers == NULL) { 370 for (j = 0; j < i; j++) { 371 RF_Free(raidPtr->parityBufferPool.buffers[i], 372 raidPtr->regionBufferPool.bufferSize * 373 sizeof(char)); 374 } 375 RF_Free(raidPtr->parityBufferPool.buffers, 376 raidPtr->regionBufferPool.totalBuffers * 377 sizeof(void *)); 378 return (ENOMEM); 379 } 380 printf("parityBufferPool.buffers[%d] = %lx\n", i, 381 (long) raidPtr->parityBufferPool.buffers[i]); 382 } 383 rf_ShutdownCreate(listp, 384 rf_ShutdownParityLoggingParityBufferPool, 385 raidPtr); 386 /* initialize parityLogDiskQueue */ 387 rf_init_mutex2(raidPtr->parityLogDiskQueue.mutex, IPL_VM); 388 rf_init_cond2(raidPtr->parityLogDiskQueue.cond, "rfpldq"); 389 raidPtr->parityLogDiskQueue.flushQueue = NULL; 390 raidPtr->parityLogDiskQueue.reintQueue = NULL; 391 raidPtr->parityLogDiskQueue.bufHead = NULL; 392 raidPtr->parityLogDiskQueue.bufTail = NULL; 393 raidPtr->parityLogDiskQueue.reintHead = NULL; 394 raidPtr->parityLogDiskQueue.reintTail = NULL; 395 raidPtr->parityLogDiskQueue.logBlockHead = NULL; 396 raidPtr->parityLogDiskQueue.logBlockTail = NULL; 397 raidPtr->parityLogDiskQueue.reintBlockHead = NULL; 398 raidPtr->parityLogDiskQueue.reintBlockTail = NULL; 399 raidPtr->parityLogDiskQueue.freeDataList = NULL; 400 raidPtr->parityLogDiskQueue.freeCommonList = NULL; 401 402 rf_ShutdownCreate(listp, 403 rf_ShutdownParityLoggingDiskQueue, 404 raidPtr); 405 for (i = 0; i < rf_numParityRegions; i++) { 406 rf_init_mutex2(raidPtr->regionInfo[i].mutex, IPL_VM); 407 rf_init_mutex2(raidPtr->regionInfo[i].reintMutex, IPL_VM); 408 raidPtr->regionInfo[i].reintInProgress = RF_FALSE; 409 raidPtr->regionInfo[i].regionStartAddr = 410 raidPtr->regionLogCapacity * i; 411 raidPtr->regionInfo[i].parityStartAddr = 412 raidPtr->regionParityRange * i; 413 if (i < rf_numParityRegions - 1) { 414 raidPtr->regionInfo[i].capacity = 415 raidPtr->regionLogCapacity; 416 raidPtr->regionInfo[i].numSectorsParity = 417 raidPtr->regionParityRange; 418 } else { 419 raidPtr->regionInfo[i].capacity = 420 lastRegionCapacity; 421 raidPtr->regionInfo[i].numSectorsParity = 422 raidPtr->sectorsPerDisk - 423 raidPtr->regionParityRange * i; 424 if (raidPtr->regionInfo[i].numSectorsParity > 425 maxRegionParityRange) 426 maxRegionParityRange = 427 raidPtr->regionInfo[i].numSectorsParity; 428 } 429 raidPtr->regionInfo[i].diskCount = 0; 430 RF_ASSERT(raidPtr->regionInfo[i].capacity + 431 raidPtr->regionInfo[i].regionStartAddr <= 432 totalLogCapacity); 433 RF_ASSERT(raidPtr->regionInfo[i].parityStartAddr + 434 raidPtr->regionInfo[i].numSectorsParity <= 435 raidPtr->sectorsPerDisk); 436 printf("Allocating %d bytes for region %d\n", 437 (int) (raidPtr->regionInfo[i].capacity * 438 sizeof(RF_DiskMap_t)), i); 439 raidPtr->regionInfo[i].diskMap = RF_Malloc( 440 raidPtr->regionInfo[i].capacity * 441 sizeof(*raidPtr->regionInfo[i].diskMap)); 442 if (raidPtr->regionInfo[i].diskMap == NULL) { 443 for (j = 0; j < i; j++) 444 FreeRegionInfo(raidPtr, j); 445 RF_Free(raidPtr->regionInfo, 446 (rf_numParityRegions * 447 sizeof(RF_RegionInfo_t))); 448 return (ENOMEM); 449 } 450 raidPtr->regionInfo[i].loggingEnabled = RF_FALSE; 451 raidPtr->regionInfo[i].coreLog = NULL; 452 } 453 rf_ShutdownCreate(listp, 454 rf_ShutdownParityLoggingRegionInfo, 455 raidPtr); 456 RF_ASSERT(raidPtr->parityLogDiskQueue.threadState == 0); 457 raidPtr->parityLogDiskQueue.threadState = RF_PLOG_CREATED; 458 rc = RF_CREATE_THREAD(raidPtr->pLogDiskThreadHandle, 459 rf_ParityLoggingDiskManager, raidPtr,"rf_log"); 460 if (rc) { 461 raidPtr->parityLogDiskQueue.threadState = 0; 462 RF_ERRORMSG3("Unable to create parity logging disk thread file %s line %d rc=%d\n", 463 __FILE__, __LINE__, rc); 464 return (ENOMEM); 465 } 466 /* wait for thread to start */ 467 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 468 while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_RUNNING)) { 469 rf_wait_cond2(raidPtr->parityLogDiskQueue.cond, 470 raidPtr->parityLogDiskQueue.mutex); 471 } 472 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 473 474 rf_ShutdownCreate(listp, rf_ShutdownParityLogging, raidPtr); 475 if (rf_parityLogDebug) { 476 printf(" size of disk log in sectors: %d\n", 477 (int) totalLogCapacity); 478 printf(" total number of parity regions is %d\n", (int) rf_numParityRegions); 479 printf(" nominal sectors of log per parity region is %d\n", (int) raidPtr->regionLogCapacity); 480 printf(" nominal region fragmentation is %d sectors\n", (int) fragmentation); 481 printf(" total number of parity logs is %d\n", raidPtr->numParityLogs); 482 printf(" parity log size is %d sectors\n", raidPtr->numSectorsPerLog); 483 printf(" total in-core log space is %d bytes\n", (int) rf_totalInCoreLogCapacity); 484 } 485 rf_EnableParityLogging(raidPtr); 486 487 return (0); 488} 489 490static void 491FreeRegionInfo( 492 RF_Raid_t * raidPtr, 493 RF_RegionId_t regionID) 494{ 495 RF_Free(raidPtr->regionInfo[regionID].diskMap, 496 (raidPtr->regionInfo[regionID].capacity * 497 sizeof(RF_DiskMap_t))); 498 if (!rf_forceParityLogReint && raidPtr->regionInfo[regionID].coreLog) { 499 rf_ReleaseParityLogs(raidPtr, 500 raidPtr->regionInfo[regionID].coreLog); 501 raidPtr->regionInfo[regionID].coreLog = NULL; 502 } else { 503 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog == NULL); 504 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == 0); 505 } 506 rf_destroy_mutex2(raidPtr->regionInfo[regionID].reintMutex); 507 rf_destroy_mutex2(raidPtr->regionInfo[regionID].mutex); 508} 509 510 511static void 512FreeParityLogQueue(RF_Raid_t * raidPtr) 513{ 514 RF_ParityLog_t *l1, *l2; 515 516 l1 = raidPtr->parityLogPool.parityLogs; 517 while (l1) { 518 l2 = l1; 519 l1 = l2->next; 520 RF_Free(l2->records, (raidPtr->numSectorsPerLog * 521 sizeof(RF_ParityLogRecord_t))); 522 RF_Free(l2, sizeof(RF_ParityLog_t)); 523 } 524 rf_destroy_mutex2(raidPtr->parityLogPool.mutex); 525} 526 527 528static void 529FreeRegionBufferQueue(RF_RegionBufferQueue_t * queue) 530{ 531 int i; 532 533 if (queue->availableBuffers != queue->totalBuffers) { 534 printf("Attempt to free region queue which is still in use!\n"); 535 RF_ASSERT(0); 536 } 537 for (i = 0; i < queue->totalBuffers; i++) 538 RF_Free(queue->buffers[i], queue->bufferSize); 539 RF_Free(queue->buffers, queue->totalBuffers * sizeof(void *)); 540 rf_destroy_mutex2(queue->mutex); 541 rf_destroy_cond2(queue->cond); 542} 543 544static void 545rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg) 546{ 547 RF_Raid_t *raidPtr; 548 RF_RegionId_t i; 549 550 raidPtr = (RF_Raid_t *) arg; 551 if (rf_parityLogDebug) { 552 printf("raid%d: ShutdownParityLoggingRegionInfo\n", 553 raidPtr->raidid); 554 } 555 /* free region information structs */ 556 for (i = 0; i < rf_numParityRegions; i++) 557 FreeRegionInfo(raidPtr, i); 558 RF_Free(raidPtr->regionInfo, (rf_numParityRegions * 559 sizeof(raidPtr->regionInfo))); 560 raidPtr->regionInfo = NULL; 561} 562 563static void 564rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg) 565{ 566 RF_Raid_t *raidPtr; 567 568 raidPtr = (RF_Raid_t *) arg; 569 if (rf_parityLogDebug) { 570 printf("raid%d: ShutdownParityLoggingPool\n", raidPtr->raidid); 571 } 572 /* free contents of parityLogPool */ 573 FreeParityLogQueue(raidPtr); 574 RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * 575 raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); 576} 577 578static void 579rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg) 580{ 581 RF_Raid_t *raidPtr; 582 583 raidPtr = (RF_Raid_t *) arg; 584 if (rf_parityLogDebug) { 585 printf("raid%d: ShutdownParityLoggingRegionBufferPool\n", 586 raidPtr->raidid); 587 } 588 FreeRegionBufferQueue(&raidPtr->regionBufferPool); 589} 590 591static void 592rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg) 593{ 594 RF_Raid_t *raidPtr; 595 596 raidPtr = (RF_Raid_t *) arg; 597 if (rf_parityLogDebug) { 598 printf("raid%d: ShutdownParityLoggingParityBufferPool\n", 599 raidPtr->raidid); 600 } 601 FreeRegionBufferQueue(&raidPtr->parityBufferPool); 602} 603 604static void 605rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg) 606{ 607 RF_ParityLogData_t *d; 608 RF_CommonLogData_t *c; 609 RF_Raid_t *raidPtr; 610 611 raidPtr = (RF_Raid_t *) arg; 612 if (rf_parityLogDebug) { 613 printf("raid%d: ShutdownParityLoggingDiskQueue\n", 614 raidPtr->raidid); 615 } 616 /* free disk manager stuff */ 617 RF_ASSERT(raidPtr->parityLogDiskQueue.bufHead == NULL); 618 RF_ASSERT(raidPtr->parityLogDiskQueue.bufTail == NULL); 619 RF_ASSERT(raidPtr->parityLogDiskQueue.reintHead == NULL); 620 RF_ASSERT(raidPtr->parityLogDiskQueue.reintTail == NULL); 621 while (raidPtr->parityLogDiskQueue.freeDataList) { 622 d = raidPtr->parityLogDiskQueue.freeDataList; 623 raidPtr->parityLogDiskQueue.freeDataList = 624 raidPtr->parityLogDiskQueue.freeDataList->next; 625 RF_Free(d, sizeof(RF_ParityLogData_t)); 626 } 627 while (raidPtr->parityLogDiskQueue.freeCommonList) { 628 c = raidPtr->parityLogDiskQueue.freeCommonList; 629 raidPtr->parityLogDiskQueue.freeCommonList = c->next; 630 /* init is in rf_paritylog.c */ 631 rf_destroy_mutex2(c->mutex); 632 RF_Free(c, sizeof(RF_CommonLogData_t)); 633 } 634 635 rf_destroy_mutex2(raidPtr->parityLogDiskQueue.mutex); 636 rf_destroy_cond2(raidPtr->parityLogDiskQueue.cond); 637} 638 639static void 640rf_ShutdownParityLogging(RF_ThreadArg_t arg) 641{ 642 RF_Raid_t *raidPtr; 643 644 raidPtr = (RF_Raid_t *) arg; 645 if (rf_parityLogDebug) { 646 printf("raid%d: ShutdownParityLogging\n", raidPtr->raidid); 647 } 648 /* shutdown disk thread */ 649 /* This has the desirable side-effect of forcing all regions to be 650 * reintegrated. This is necessary since all parity log maps are 651 * currently held in volatile memory. */ 652 653 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 654 raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_TERMINATE; 655 rf_signal_cond2(raidPtr->parityLogDiskQueue.cond); 656 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 657 /* 658 * pLogDiskThread will now terminate when queues are cleared 659 * now wait for it to be done 660 */ 661 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 662 while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_SHUTDOWN)) { 663 rf_wait_cond2(raidPtr->parityLogDiskQueue.cond, 664 raidPtr->parityLogDiskQueue.mutex); 665 } 666 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 667 if (rf_parityLogDebug) { 668 printf("raid%d: ShutdownParityLogging done (thread completed)\n", raidPtr->raidid); 669 } 670} 671 672int 673rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t * raidPtr) 674{ 675 return (20); 676} 677 678RF_HeadSepLimit_t 679rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t * raidPtr) 680{ 681 return (10); 682} 683/* return the region ID for a given RAID address */ 684RF_RegionId_t 685rf_MapRegionIDParityLogging( 686 RF_Raid_t * raidPtr, 687 RF_SectorNum_t address) 688{ 689 RF_RegionId_t regionID; 690 691/* regionID = address / (raidPtr->regionParityRange * raidPtr->Layout.numDataCol); */ 692 regionID = address / raidPtr->regionParityRange; 693 if (regionID == rf_numParityRegions) { 694 /* last region may be larger than other regions */ 695 regionID--; 696 } 697 RF_ASSERT(address >= raidPtr->regionInfo[regionID].parityStartAddr); 698 RF_ASSERT(address < raidPtr->regionInfo[regionID].parityStartAddr + 699 raidPtr->regionInfo[regionID].numSectorsParity); 700 RF_ASSERT(regionID < rf_numParityRegions); 701 return (regionID); 702} 703 704 705/* given a logical RAID sector, determine physical disk address of data */ 706void 707rf_MapSectorParityLogging( 708 RF_Raid_t * raidPtr, 709 RF_RaidAddr_t raidSector, 710 RF_RowCol_t * col, 711 RF_SectorNum_t * diskSector, 712 int remap) 713{ 714 RF_StripeNum_t SUID = raidSector / 715 raidPtr->Layout.sectorsPerStripeUnit; 716 /* *col = (SUID % (raidPtr->numCol - 717 * raidPtr->Layout.numParityLogCol)); */ 718 *col = SUID % raidPtr->Layout.numDataCol; 719 *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * 720 raidPtr->Layout.sectorsPerStripeUnit + 721 (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 722} 723 724 725/* given a logical RAID sector, determine physical disk address of parity */ 726void 727rf_MapParityParityLogging( 728 RF_Raid_t * raidPtr, 729 RF_RaidAddr_t raidSector, 730 RF_RowCol_t * col, 731 RF_SectorNum_t * diskSector, 732 int remap) 733{ 734 RF_StripeNum_t SUID = raidSector / 735 raidPtr->Layout.sectorsPerStripeUnit; 736 737 /* *col = 738 * raidPtr->Layout.numDataCol-(SUID/raidPtr->Layout.numDataCol)%(raidPt 739 * r->numCol - raidPtr->Layout.numParityLogCol); */ 740 *col = raidPtr->Layout.numDataCol; 741 *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * 742 raidPtr->Layout.sectorsPerStripeUnit + 743 (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 744} 745 746 747/* given a regionID and sector offset, determine the physical disk address of the parity log */ 748void 749rf_MapLogParityLogging( 750 RF_Raid_t * raidPtr, 751 RF_RegionId_t regionID, 752 RF_SectorNum_t regionOffset, 753 RF_RowCol_t * col, 754 RF_SectorNum_t * startSector) 755{ 756 *col = raidPtr->numCol - 1; 757 *startSector = raidPtr->regionInfo[regionID].regionStartAddr + regionOffset; 758} 759 760 761/* given a regionID, determine the physical disk address of the logged 762 parity for that region */ 763void 764rf_MapRegionParity( 765 RF_Raid_t * raidPtr, 766 RF_RegionId_t regionID, 767 RF_RowCol_t * col, 768 RF_SectorNum_t * startSector, 769 RF_SectorCount_t * numSector) 770{ 771 *col = raidPtr->numCol - 2; 772 *startSector = raidPtr->regionInfo[regionID].parityStartAddr; 773 *numSector = raidPtr->regionInfo[regionID].numSectorsParity; 774} 775 776 777/* given a logical RAID address, determine the participating disks in 778 the stripe */ 779void 780rf_IdentifyStripeParityLogging( 781 RF_Raid_t * raidPtr, 782 RF_RaidAddr_t addr, 783 RF_RowCol_t ** diskids) 784{ 785 RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, 786 addr); 787 RF_ParityLoggingConfigInfo_t *info = (RF_ParityLoggingConfigInfo_t *) 788 raidPtr->Layout.layoutSpecificInfo; 789 *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol]; 790} 791 792 793void 794rf_MapSIDToPSIDParityLogging( 795 RF_RaidLayout_t * layoutPtr, 796 RF_StripeNum_t stripeID, 797 RF_StripeNum_t * psID, 798 RF_ReconUnitNum_t * which_ru) 799{ 800 *which_ru = 0; 801 *psID = stripeID; 802} 803 804 805/* select an algorithm for performing an access. Returns two pointers, 806 * one to a function that will return information about the DAG, and 807 * another to a function that will create the dag. 808 */ 809void 810rf_ParityLoggingDagSelect( 811 RF_Raid_t * raidPtr, 812 RF_IoType_t type, 813 RF_AccessStripeMap_t * asmp, 814 RF_VoidFuncPtr * createFunc) 815{ 816 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 817 RF_PhysDiskAddr_t *failedPDA = NULL; 818 RF_RowCol_t fcol; 819 RF_RowStatus_t rstat; 820 int prior_recon; 821 822 RF_ASSERT(RF_IO_IS_R_OR_W(type)); 823 824 if (asmp->numDataFailed + asmp->numParityFailed > 1) { 825 RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); 826 *createFunc = NULL; 827 return; 828 } else 829 if (asmp->numDataFailed + asmp->numParityFailed == 1) { 830 831 /* if under recon & already reconstructed, redirect 832 * the access to the spare drive and eliminate the 833 * failure indication */ 834 failedPDA = asmp->failedPDAs[0]; 835 fcol = failedPDA->col; 836 rstat = raidPtr->status; 837 prior_recon = (rstat == rf_rs_reconfigured) || ( 838 (rstat == rf_rs_reconstructing) ? 839 rf_CheckRUReconstructed(raidPtr->reconControl->reconMap, failedPDA->startSector) : 0 840 ); 841 if (prior_recon) { 842 RF_RowCol_t oc = failedPDA->col; 843 RF_SectorNum_t oo = failedPDA->startSector; 844 if (layoutPtr->map->flags & 845 RF_DISTRIBUTE_SPARE) { 846 /* redirect to dist spare space */ 847 848 if (failedPDA == asmp->parityInfo) { 849 850 /* parity has failed */ 851 (layoutPtr->map->MapParity) (raidPtr, failedPDA->raidAddress, 852 &failedPDA->col, &failedPDA->startSector, RF_REMAP); 853 854 if (asmp->parityInfo->next) { /* redir 2nd component, 855 * if any */ 856 RF_PhysDiskAddr_t *p = asmp->parityInfo->next; 857 RF_SectorNum_t SUoffs = p->startSector % layoutPtr->sectorsPerStripeUnit; 858 p->col = failedPDA->col; 859 p->startSector = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->startSector) + 860 SUoffs; /* cheating: 861 * startSector is not 862 * really a RAID address */ 863 } 864 } else 865 if (asmp->parityInfo->next && failedPDA == asmp->parityInfo->next) { 866 RF_ASSERT(0); /* should not ever 867 * happen */ 868 } else { 869 870 /* data has failed */ 871 (layoutPtr->map->MapSector) (raidPtr, failedPDA->raidAddress, 872 &failedPDA->col, &failedPDA->startSector, RF_REMAP); 873 874 } 875 876 } else { 877 /* redirect to dedicated spare space */ 878 879 failedPDA->col = raidPtr->Disks[fcol].spareCol; 880 881 /* the parity may have two distinct 882 * components, both of which may need 883 * to be redirected */ 884 if (asmp->parityInfo->next) { 885 if (failedPDA == asmp->parityInfo) { 886 failedPDA->next->col = failedPDA->col; 887 } else 888 if (failedPDA == asmp->parityInfo->next) { /* paranoid: should never occur */ 889 asmp->parityInfo->col = failedPDA->col; 890 } 891 } 892 } 893 894 RF_ASSERT(failedPDA->col != -1); 895 896 if (rf_dagDebug || rf_mapDebug) { 897 printf("raid%d: Redirected type '%c' c %d o %ld -> c %d o %ld\n", 898 raidPtr->raidid, type, oc, (long) oo, failedPDA->col, (long) failedPDA->startSector); 899 } 900 asmp->numDataFailed = asmp->numParityFailed = 0; 901 } 902 } 903 if (type == RF_IO_TYPE_READ) { 904 905 if (asmp->numDataFailed == 0) 906 *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; 907 else 908 *createFunc = (RF_VoidFuncPtr) rf_CreateRaidFiveDegradedReadDAG; 909 910 } else { 911 912 913 /* if mirroring, always use large writes. If the access 914 * requires two distinct parity updates, always do a small 915 * write. If the stripe contains a failure but the access 916 * does not, do a small write. The first conditional 917 * (numStripeUnitsAccessed <= numDataCol/2) uses a 918 * less-than-or-equal rather than just a less-than because 919 * when G is 3 or 4, numDataCol/2 is 1, and I want 920 * single-stripe-unit updates to use just one disk. */ 921 if ((asmp->numDataFailed + asmp->numParityFailed) == 0) { 922 if (((asmp->numStripeUnitsAccessed <= 923 (layoutPtr->numDataCol / 2)) && 924 (layoutPtr->numDataCol != 1)) || 925 (asmp->parityInfo->next != NULL) || 926 rf_CheckStripeForFailures(raidPtr, asmp)) { 927 *createFunc = (RF_VoidFuncPtr) rf_CreateParityLoggingSmallWriteDAG; 928 } else 929 *createFunc = (RF_VoidFuncPtr) rf_CreateParityLoggingLargeWriteDAG; 930 } else 931 if (asmp->numParityFailed == 1) 932 *createFunc = (RF_VoidFuncPtr) rf_CreateNonRedundantWriteDAG; 933 else 934 if (asmp->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit) 935 *createFunc = NULL; 936 else 937 *createFunc = (RF_VoidFuncPtr) rf_CreateDegradedWriteDAG; 938 } 939} 940#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 941