1131476Spjd/* $NetBSD: rf_paritylog.c,v 1.20 2019/10/10 03:43:59 christos Exp $ */ 2131476Spjd/* 3161246Spjd * Copyright (c) 1995 Carnegie-Mellon University. 4131476Spjd * All rights reserved. 5131476Spjd * 6131476Spjd * Author: William V. Courtright II 7131476Spjd * 8131476Spjd * Permission to use, copy, modify and distribute this software and 9131476Spjd * its documentation is hereby granted, provided that both the copyright 10131476Spjd * notice and this permission notice appear in all copies of the 11131476Spjd * software, derivative works or modified versions, and any portions 12131476Spjd * thereof, and that both notices appear in supporting documentation. 13131476Spjd * 14155174Spjd * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15131476Spjd * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16131476Spjd * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17131476Spjd * 18131476Spjd * Carnegie Mellon requests users of this software to return to 19131476Spjd * 20131476Spjd * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21131476Spjd * School of Computer Science 22131476Spjd * Carnegie Mellon University 23131476Spjd * Pittsburgh PA 15213-3890 24131476Spjd * 25131476Spjd * any improvements or extensions that they make and grant Carnegie the 26131476Spjd * rights to redistribute these changes. 27131476Spjd */ 28131476Spjd 29131476Spjd/* Code for manipulating in-core parity logs 30131476Spjd * 31131476Spjd */ 32131476Spjd 33131476Spjd#include <sys/cdefs.h> 34131476Spjd__KERNEL_RCSID(0, "$NetBSD: rf_paritylog.c,v 1.20 2019/10/10 03:43:59 christos Exp $"); 35131476Spjd 36131476Spjd#include "rf_archs.h" 37131476Spjd 38161246Spjd#if RF_INCLUDE_PARITYLOGGING > 0 39131476Spjd 40131476Spjd/* 41161246Spjd * Append-only log for recording parity "update" and "overwrite" records 42131476Spjd */ 43131476Spjd 44131476Spjd#include <dev/raidframe/raidframevar.h> 45131476Spjd 46131476Spjd#include "rf_threadstuff.h" 47161246Spjd#include "rf_mcpair.h" 48161246Spjd#include "rf_raid.h" 49161246Spjd#include "rf_dag.h" 50161246Spjd#include "rf_dagfuncs.h" 51161246Spjd#include "rf_desc.h" 52131476Spjd#include "rf_layout.h" 53131476Spjd#include "rf_diskqueue.h" 54131476Spjd#include "rf_etimer.h" 55161246Spjd#include "rf_paritylog.h" 56161246Spjd#include "rf_general.h" 57161246Spjd#include "rf_map.h" 58131476Spjd#include "rf_paritylogging.h" 59161246Spjd#include "rf_paritylogDiskMgr.h" 60161246Spjd 61161246Spjdstatic RF_CommonLogData_t * 62161246SpjdAllocParityLogCommonData(RF_Raid_t * raidPtr) 63161246Spjd{ 64161246Spjd RF_CommonLogData_t *common = NULL; 65161246Spjd 66161246Spjd /* Return a struct for holding common parity log information from the 67161246Spjd * free list (rf_parityLogDiskQueue.freeCommonList). If the free list 68161246Spjd * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */ 69131476Spjd 70161246Spjd rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 71161246Spjd if (raidPtr->parityLogDiskQueue.freeCommonList) { 72161246Spjd common = raidPtr->parityLogDiskQueue.freeCommonList; 73161246Spjd raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next; 74161246Spjd rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 75161246Spjd } else { 76161246Spjd rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 77161246Spjd common = RF_Malloc(sizeof(*common)); 78161246Spjd /* destroy is in rf_paritylogging.c */ 79161246Spjd rf_init_mutex2(common->mutex, IPL_VM); 80131476Spjd } 81161246Spjd common->next = NULL; 82161246Spjd return (common); 83161246Spjd} 84161246Spjd 85161246Spjdstatic void 86161246SpjdFreeParityLogCommonData(RF_CommonLogData_t * common) 87161246Spjd{ 88161246Spjd RF_Raid_t *raidPtr; 89161246Spjd 90161246Spjd /* Insert a single struct for holding parity log information (data) 91161246Spjd * into the free list (rf_parityLogDiskQueue.freeCommonList). 92161246Spjd * NON-BLOCKING */ 93161246Spjd 94161246Spjd raidPtr = common->raidPtr; 95161246Spjd rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 96161246Spjd common->next = raidPtr->parityLogDiskQueue.freeCommonList; 97161246Spjd raidPtr->parityLogDiskQueue.freeCommonList = common; 98161246Spjd rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 99161246Spjd} 100161246Spjd 101161246Spjdstatic RF_ParityLogData_t * 102161246SpjdAllocParityLogData(RF_Raid_t * raidPtr) 103161246Spjd{ 104131476Spjd RF_ParityLogData_t *data = NULL; 105161246Spjd 106161246Spjd /* Return a struct for holding parity log information from the free 107161246Spjd * list (rf_parityLogDiskQueue.freeList). If the free list is empty, 108161246Spjd * call RF_Malloc to create a new structure. NON-BLOCKING */ 109161246Spjd 110161246Spjd rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 111161246Spjd if (raidPtr->parityLogDiskQueue.freeDataList) { 112161246Spjd data = raidPtr->parityLogDiskQueue.freeDataList; 113161246Spjd raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next; 114161246Spjd rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 115161246Spjd } else { 116161246Spjd rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 117161246Spjd data = RF_Malloc(sizeof(*data)); 118161246Spjd } 119161246Spjd data->next = NULL; 120161246Spjd data->prev = NULL; 121161246Spjd return (data); 122161246Spjd} 123161246Spjd 124161246Spjd 125161246Spjdstatic void 126161246SpjdFreeParityLogData(RF_ParityLogData_t * data) 127161246Spjd{ 128161246Spjd RF_ParityLogData_t *nextItem; 129131476Spjd RF_Raid_t *raidPtr; 130161246Spjd 131161246Spjd /* Insert a linked list of structs for holding parity log information 132161246Spjd * (data) into the free list (parityLogDiskQueue.freeList). 133161246Spjd * NON-BLOCKING */ 134161246Spjd 135161246Spjd raidPtr = data->common->raidPtr; 136161246Spjd rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 137161246Spjd while (data) { 138161246Spjd nextItem = data->next; 139161246Spjd data->next = raidPtr->parityLogDiskQueue.freeDataList; 140161246Spjd raidPtr->parityLogDiskQueue.freeDataList = data; 141161246Spjd data = nextItem; 142161246Spjd } 143161246Spjd rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 144161246Spjd} 145161246Spjd 146161246Spjd 147161246Spjdstatic void 148161246SpjdEnqueueParityLogData( 149161246Spjd RF_ParityLogData_t * data, 150161246Spjd RF_ParityLogData_t ** head, 151161246Spjd RF_ParityLogData_t ** tail) 152161246Spjd{ 153161246Spjd RF_Raid_t *raidPtr; 154161246Spjd 155161246Spjd /* Insert an in-core parity log (*data) into the head of a disk queue 156161246Spjd * (*head, *tail). NON-BLOCKING */ 157161246Spjd 158161246Spjd raidPtr = data->common->raidPtr; 159161246Spjd if (rf_parityLogDebug) 160161246Spjd printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 161161246Spjd RF_ASSERT(data->prev == NULL); 162161246Spjd RF_ASSERT(data->next == NULL); 163161246Spjd rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 164161246Spjd if (*head) { 165161246Spjd /* insert into head of queue */ 166161246Spjd RF_ASSERT((*head)->prev == NULL); 167161246Spjd RF_ASSERT((*tail)->next == NULL); 168161246Spjd data->next = *head; 169161246Spjd (*head)->prev = data; 170161246Spjd *head = data; 171161246Spjd } else { 172161246Spjd /* insert into empty list */ 173161246Spjd RF_ASSERT(*head == NULL); 174161246Spjd RF_ASSERT(*tail == NULL); 175161246Spjd *head = data; 176161246Spjd *tail = data; 177161246Spjd } 178161246Spjd RF_ASSERT((*head)->prev == NULL); 179161246Spjd RF_ASSERT((*tail)->next == NULL); 180161246Spjd rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 181161246Spjd} 182161246Spjd 183161246Spjdstatic RF_ParityLogData_t * 184161246SpjdDequeueParityLogData( 185161246Spjd RF_Raid_t * raidPtr, 186161246Spjd RF_ParityLogData_t ** head, 187161246Spjd RF_ParityLogData_t ** tail, 188161246Spjd int ignoreLocks) 189161246Spjd{ 190161246Spjd RF_ParityLogData_t *data; 191161246Spjd 192161246Spjd /* Remove and return an in-core parity log from the tail of a disk 193161246Spjd * queue (*head, *tail). NON-BLOCKING */ 194161246Spjd 195161246Spjd /* remove from tail, preserving FIFO order */ 196131476Spjd if (!ignoreLocks) 197161246Spjd rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 198161246Spjd data = *tail; 199161246Spjd if (data) { 200131476Spjd if (*head == *tail) { 201161246Spjd /* removing last item from queue */ 202161246Spjd *head = NULL; 203131476Spjd *tail = NULL; 204131476Spjd } else { 205131476Spjd *tail = (*tail)->prev; 206131476Spjd (*tail)->next = NULL; 207131476Spjd RF_ASSERT((*head)->prev == NULL); 208131476Spjd RF_ASSERT((*tail)->next == NULL); 209131476Spjd } 210131476Spjd data->next = NULL; 211161246Spjd data->prev = NULL; 212161246Spjd if (rf_parityLogDebug) 213161246Spjd printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 214161246Spjd } 215161246Spjd if (*head) { 216161246Spjd RF_ASSERT((*head)->prev == NULL); 217131476Spjd RF_ASSERT((*tail)->next == NULL); 218131476Spjd } 219131476Spjd if (!ignoreLocks) 220131476Spjd rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 221131476Spjd return (data); 222131476Spjd} 223 224 225static void 226RequeueParityLogData( 227 RF_ParityLogData_t * data, 228 RF_ParityLogData_t ** head, 229 RF_ParityLogData_t ** tail) 230{ 231 RF_Raid_t *raidPtr; 232 233 /* Insert an in-core parity log (*data) into the tail of a disk queue 234 * (*head, *tail). NON-BLOCKING */ 235 236 raidPtr = data->common->raidPtr; 237 RF_ASSERT(data); 238 if (rf_parityLogDebug) 239 printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 240 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 241 if (*tail) { 242 /* append to tail of list */ 243 data->prev = *tail; 244 data->next = NULL; 245 (*tail)->next = data; 246 *tail = data; 247 } else { 248 /* inserting into an empty list */ 249 *head = data; 250 *tail = data; 251 (*head)->prev = NULL; 252 (*tail)->next = NULL; 253 } 254 RF_ASSERT((*head)->prev == NULL); 255 RF_ASSERT((*tail)->next == NULL); 256 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 257} 258 259RF_ParityLogData_t * 260rf_CreateParityLogData( 261 RF_ParityRecordType_t operation, 262 RF_PhysDiskAddr_t * pda, 263 void *bufPtr, 264 RF_Raid_t * raidPtr, 265 void (*wakeFunc)(void *, int), 266 void *wakeArg, 267 RF_AccTraceEntry_t * tracerec, 268 RF_Etimer_t startTime) 269{ 270 RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL; 271 RF_CommonLogData_t *common; 272 RF_PhysDiskAddr_t *diskAddress; 273 int boundary, offset = 0; 274 275 /* Return an initialized struct of info to be logged. Build one item 276 * per physical disk address, one item per region. 277 * 278 * NON-BLOCKING */ 279 280 diskAddress = pda; 281 common = AllocParityLogCommonData(raidPtr); 282 RF_ASSERT(common); 283 284 common->operation = operation; 285 common->bufPtr = bufPtr; 286 common->raidPtr = raidPtr; 287 common->wakeFunc = wakeFunc; 288 common->wakeArg = wakeArg; 289 common->tracerec = tracerec; 290 common->startTime = startTime; 291 common->cnt = 0; 292 293 if (rf_parityLogDebug) 294 printf("[entering CreateParityLogData]\n"); 295 while (diskAddress) { 296 common->cnt++; 297 data = AllocParityLogData(raidPtr); 298 RF_ASSERT(data); 299 data->common = common; 300 data->next = NULL; 301 data->prev = NULL; 302 data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector); 303 if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) { 304 /* disk address does not cross a region boundary */ 305 data->diskAddress = *diskAddress; 306 data->bufOffset = offset; 307 offset = offset + diskAddress->numSector; 308 EnqueueParityLogData(data, &resultHead, &resultTail); 309 /* adjust disk address */ 310 diskAddress = diskAddress->next; 311 } else { 312 /* disk address crosses a region boundary */ 313 /* find address where region is crossed */ 314 boundary = 0; 315 while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary)) 316 boundary++; 317 318 /* enter data before the boundary */ 319 data->diskAddress = *diskAddress; 320 data->diskAddress.numSector = boundary; 321 data->bufOffset = offset; 322 offset += boundary; 323 EnqueueParityLogData(data, &resultHead, &resultTail); 324 /* adjust disk address */ 325 diskAddress->startSector += boundary; 326 diskAddress->numSector -= boundary; 327 } 328 } 329 if (rf_parityLogDebug) 330 printf("[leaving CreateParityLogData]\n"); 331 return (resultHead); 332} 333 334 335RF_ParityLogData_t * 336rf_SearchAndDequeueParityLogData( 337 RF_Raid_t * raidPtr, 338 int regionID, 339 RF_ParityLogData_t ** head, 340 RF_ParityLogData_t ** tail, 341 int ignoreLocks) 342{ 343 RF_ParityLogData_t *w; 344 345 /* Remove and return an in-core parity log from a specified region 346 * (regionID). If a matching log is not found, return NULL. 347 * 348 * NON-BLOCKING. */ 349 350 /* walk backward through a list, looking for an entry with a matching 351 * region ID */ 352 if (!ignoreLocks) 353 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 354 w = (*tail); 355 while (w) { 356 if (w->regionID == regionID) { 357 /* remove an element from the list */ 358 if (w == *tail) { 359 if (*head == *tail) { 360 /* removing only element in the list */ 361 *head = NULL; 362 *tail = NULL; 363 } else { 364 /* removing last item in the list */ 365 *tail = (*tail)->prev; 366 (*tail)->next = NULL; 367 RF_ASSERT((*head)->prev == NULL); 368 RF_ASSERT((*tail)->next == NULL); 369 } 370 } else { 371 if (w == *head) { 372 /* removing first item in the list */ 373 *head = (*head)->next; 374 (*head)->prev = NULL; 375 RF_ASSERT((*head)->prev == NULL); 376 RF_ASSERT((*tail)->next == NULL); 377 } else { 378 /* removing an item from the middle of 379 * the list */ 380 w->prev->next = w->next; 381 w->next->prev = w->prev; 382 RF_ASSERT((*head)->prev == NULL); 383 RF_ASSERT((*tail)->next == NULL); 384 } 385 } 386 w->prev = NULL; 387 w->next = NULL; 388 if (rf_parityLogDebug) 389 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector); 390 return (w); 391 } else 392 w = w->prev; 393 } 394 if (!ignoreLocks) 395 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 396 return (NULL); 397} 398 399static RF_ParityLogData_t * 400DequeueMatchingLogData( 401 RF_Raid_t * raidPtr, 402 RF_ParityLogData_t ** head, 403 RF_ParityLogData_t ** tail) 404{ 405 RF_ParityLogData_t *logDataList, *logData; 406 int regionID; 407 408 /* Remove and return an in-core parity log from the tail of a disk 409 * queue (*head, *tail). Then remove all matching (identical 410 * regionIDs) logData and return as a linked list. 411 * 412 * NON-BLOCKING */ 413 414 logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE); 415 if (logDataList) { 416 regionID = logDataList->regionID; 417 logData = logDataList; 418 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); 419 while (logData->next) { 420 logData = logData->next; 421 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); 422 } 423 } 424 return (logDataList); 425} 426 427 428static RF_ParityLog_t * 429AcquireParityLog( 430 RF_ParityLogData_t * logData, 431 int finish) 432{ 433 RF_ParityLog_t *log = NULL; 434 RF_Raid_t *raidPtr; 435 436 /* Grab a log buffer from the pool and return it. If no buffers are 437 * available, return NULL. NON-BLOCKING */ 438 raidPtr = logData->common->raidPtr; 439 rf_lock_mutex2(raidPtr->parityLogPool.mutex); 440 if (raidPtr->parityLogPool.parityLogs) { 441 log = raidPtr->parityLogPool.parityLogs; 442 raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next; 443 log->regionID = logData->regionID; 444 log->numRecords = 0; 445 log->next = NULL; 446 raidPtr->logsInUse++; 447 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 448 } else { 449 /* no logs available, so place ourselves on the queue of work 450 * waiting on log buffers this is done while 451 * parityLogPool.mutex is held, to ensure synchronization with 452 * ReleaseParityLogs. */ 453 if (rf_parityLogDebug) 454 printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish); 455 if (finish) 456 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 457 else 458 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 459 } 460 rf_unlock_mutex2(raidPtr->parityLogPool.mutex); 461 return (log); 462} 463 464void 465rf_ReleaseParityLogs( 466 RF_Raid_t * raidPtr, 467 RF_ParityLog_t * firstLog) 468{ 469 RF_ParityLogData_t *logDataList; 470 RF_ParityLog_t *log, *lastLog; 471 int cnt; 472 473 /* Insert a linked list of parity logs (firstLog) to the free list 474 * (parityLogPool.parityLogPool) 475 * 476 * NON-BLOCKING. */ 477 478 RF_ASSERT(firstLog); 479 480 /* Before returning logs to global free list, service all requests 481 * which are blocked on logs. Holding mutexes for parityLogPool and 482 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */ 483 rf_lock_mutex2(raidPtr->parityLogPool.mutex); 484 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 485 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 486 log = firstLog; 487 if (firstLog) 488 firstLog = firstLog->next; 489 log->numRecords = 0; 490 log->next = NULL; 491 while (logDataList && log) { 492 rf_unlock_mutex2(raidPtr->parityLogPool.mutex); 493 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 494 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE); 495 if (rf_parityLogDebug) 496 printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID); 497 if (log == NULL) { 498 log = firstLog; 499 if (firstLog) { 500 firstLog = firstLog->next; 501 log->numRecords = 0; 502 log->next = NULL; 503 } 504 } 505 rf_lock_mutex2(raidPtr->parityLogPool.mutex); 506 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 507 if (log) 508 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 509 } 510 /* return remaining logs to pool */ 511 if (log) { 512 log->next = firstLog; 513 firstLog = log; 514 } 515 if (firstLog) { 516 lastLog = firstLog; 517 raidPtr->logsInUse--; 518 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 519 while (lastLog->next) { 520 lastLog = lastLog->next; 521 raidPtr->logsInUse--; 522 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 523 } 524 lastLog->next = raidPtr->parityLogPool.parityLogs; 525 raidPtr->parityLogPool.parityLogs = firstLog; 526 cnt = 0; 527 log = raidPtr->parityLogPool.parityLogs; 528 while (log) { 529 cnt++; 530 log = log->next; 531 } 532 RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs); 533 } 534 rf_unlock_mutex2(raidPtr->parityLogPool.mutex); 535 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 536} 537 538static void 539ReintLog( 540 RF_Raid_t * raidPtr, 541 int regionID, 542 RF_ParityLog_t * log) 543{ 544 RF_ASSERT(log); 545 546 /* Insert an in-core parity log (log) into the disk queue of 547 * reintegration work. Set the flag (reintInProgress) for the 548 * specified region (regionID) to indicate that reintegration is in 549 * progress for this region. NON-BLOCKING */ 550 551 rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex); 552 raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint 553 * complete */ 554 555 if (rf_parityLogDebug) 556 printf("[requesting reintegration of region %d]\n", log->regionID); 557 /* move record to reintegration queue */ 558 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 559 log->next = raidPtr->parityLogDiskQueue.reintQueue; 560 raidPtr->parityLogDiskQueue.reintQueue = log; 561 rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex); 562 rf_signal_cond2(raidPtr->parityLogDiskQueue.cond); 563 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 564} 565 566static void 567FlushLog( 568 RF_Raid_t * raidPtr, 569 RF_ParityLog_t * log) 570{ 571 /* insert a core log (log) into a list of logs 572 * (parityLogDiskQueue.flushQueue) waiting to be written to disk. 573 * NON-BLOCKING */ 574 575 RF_ASSERT(log); 576 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); 577 RF_ASSERT(log->next == NULL); 578 /* move log to flush queue */ 579 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 580 log->next = raidPtr->parityLogDiskQueue.flushQueue; 581 raidPtr->parityLogDiskQueue.flushQueue = log; 582 rf_signal_cond2(raidPtr->parityLogDiskQueue.cond); 583 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 584} 585 586static int 587DumpParityLogToDisk( 588 int finish, 589 RF_ParityLogData_t * logData) 590{ 591 int i, diskCount, regionID = logData->regionID; 592 RF_ParityLog_t *log; 593 RF_Raid_t *raidPtr; 594 595 raidPtr = logData->common->raidPtr; 596 597 /* Move a core log to disk. If the log disk is full, initiate 598 * reintegration. 599 * 600 * Return (0) if we can enqueue the dump immediately, otherwise return 601 * (1) to indicate we are blocked on reintegration and control of the 602 * thread should be relinquished. 603 * 604 * Caller must hold regionInfo[regionID].mutex 605 * 606 * NON-BLOCKING */ 607 608 RF_ASSERT(rf_owned_mutex2(raidPtr->regionInfo[regionID].mutex)); 609 610 if (rf_parityLogDebug) 611 printf("[dumping parity log to disk, region %d]\n", regionID); 612 log = raidPtr->regionInfo[regionID].coreLog; 613 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); 614 RF_ASSERT(log->next == NULL); 615 616 /* if reintegration is in progress, must queue work */ 617 rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex); 618 if (raidPtr->regionInfo[regionID].reintInProgress) { 619 /* Can not proceed since this region is currently being 620 * reintegrated. We can not block, so queue remaining work and 621 * return */ 622 if (rf_parityLogDebug) 623 printf("[region %d waiting on reintegration]\n", regionID); 624 /* XXX not sure about the use of finish - shouldn't this 625 * always be "Enqueue"? */ 626 if (finish) 627 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); 628 else 629 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); 630 rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex); 631 return (1); /* relenquish control of this thread */ 632 } 633 rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex); 634 raidPtr->regionInfo[regionID].coreLog = NULL; 635 if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity) 636 /* IMPORTANT!! this loop bound assumes region disk holds an 637 * integral number of core logs */ 638 { 639 /* update disk map for this region */ 640 diskCount = raidPtr->regionInfo[regionID].diskCount; 641 for (i = 0; i < raidPtr->numSectorsPerLog; i++) { 642 raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation; 643 raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr; 644 } 645 log->diskOffset = diskCount; 646 raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog; 647 FlushLog(raidPtr, log); 648 } else { 649 /* no room for log on disk, send it to disk manager and 650 * request reintegration */ 651 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity); 652 ReintLog(raidPtr, regionID, log); 653 } 654 if (rf_parityLogDebug) 655 printf("[finished dumping parity log to disk, region %d]\n", regionID); 656 return (0); 657} 658 659int 660rf_ParityLogAppend( 661 RF_ParityLogData_t * logData, 662 int finish, 663 RF_ParityLog_t ** incomingLog, 664 int clearReintFlag) 665{ 666 int regionID, logItem, itemDone; 667 RF_ParityLogData_t *item; 668 int punt, done = RF_FALSE; 669 RF_ParityLog_t *log; 670 RF_Raid_t *raidPtr; 671 RF_Etimer_t timer; 672 void (*wakeFunc) (void *, int); 673 void *wakeArg; 674 675 /* Add parity to the appropriate log, one sector at a time. This 676 * routine is called is called by dag functions ParityLogUpdateFunc 677 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING. 678 * 679 * Parity to be logged is contained in a linked-list (logData). When 680 * this routine returns, every sector in the list will be in one of 681 * three places: 1) entered into the parity log 2) queued, waiting on 682 * reintegration 3) queued, waiting on a core log 683 * 684 * Blocked work is passed to the ParityLoggingDiskManager for completion. 685 * Later, as conditions which required the block are removed, the work 686 * reenters this routine with the "finish" parameter set to "RF_TRUE." 687 * 688 * NON-BLOCKING */ 689 690 raidPtr = logData->common->raidPtr; 691 /* lock the region for the first item in logData */ 692 RF_ASSERT(logData != NULL); 693 regionID = logData->regionID; 694 rf_lock_mutex2(raidPtr->regionInfo[regionID].mutex); 695 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); 696 697 if (clearReintFlag) { 698 /* Enable flushing for this region. Holding both locks 699 * provides a synchronization barrier with DumpParityLogToDisk */ 700 rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex); 701 /* XXXmrg need this? */ 702 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 703 RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE); 704 raidPtr->regionInfo[regionID].diskCount = 0; 705 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; 706 rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now 707 * enabled */ 708 /* XXXmrg need this? */ 709 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 710 } 711 /* process each item in logData */ 712 while (logData) { 713 /* remove an item from logData */ 714 item = logData; 715 logData = logData->next; 716 item->next = NULL; 717 item->prev = NULL; 718 719 if (rf_parityLogDebug) 720 printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector); 721 722 /* see if we moved to a new region */ 723 if (regionID != item->regionID) { 724 rf_unlock_mutex2(raidPtr->regionInfo[regionID].mutex); 725 regionID = item->regionID; 726 rf_lock_mutex2(raidPtr->regionInfo[regionID].mutex); 727 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); 728 } 729 punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This 730 * can happen in one of two ways: 1) no core 731 * log (AcquireParityLog) 2) waiting on 732 * reintegration (DumpParityLogToDisk) If punt 733 * is RF_TRUE, the dataItem was queued, so 734 * skip to next item. */ 735 736 /* process item, one sector at a time, until all sectors 737 * processed or we punt */ 738 if (item->diskAddress.numSector > 0) 739 done = RF_FALSE; 740 else 741 RF_ASSERT(0); 742 while (!punt && !done) { 743 /* verify that a core log exists for this region */ 744 if (!raidPtr->regionInfo[regionID].coreLog) { 745 /* Attempt to acquire a parity log. If 746 * acquisition fails, queue remaining work in 747 * data item and move to nextItem. */ 748 if (incomingLog) 749 if (*incomingLog) { 750 RF_ASSERT((*incomingLog)->next == NULL); 751 raidPtr->regionInfo[regionID].coreLog = *incomingLog; 752 raidPtr->regionInfo[regionID].coreLog->regionID = regionID; 753 *incomingLog = NULL; 754 } else 755 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 756 else 757 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 758 /* Note: AcquireParityLog either returns a log 759 * or enqueues currentItem */ 760 } 761 if (!raidPtr->regionInfo[regionID].coreLog) 762 punt = RF_TRUE; /* failed to find a core log */ 763 else { 764 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); 765 /* verify that the log has room for new 766 * entries */ 767 /* if log is full, dump it to disk and grab a 768 * new log */ 769 if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) { 770 /* log is full, dump it to disk */ 771 if (DumpParityLogToDisk(finish, item)) 772 punt = RF_TRUE; /* dump unsuccessful, 773 * blocked on 774 * reintegration */ 775 else { 776 /* dump was successful */ 777 if (incomingLog) 778 if (*incomingLog) { 779 RF_ASSERT((*incomingLog)->next == NULL); 780 raidPtr->regionInfo[regionID].coreLog = *incomingLog; 781 raidPtr->regionInfo[regionID].coreLog->regionID = regionID; 782 *incomingLog = NULL; 783 } else 784 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 785 else 786 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 787 /* if a core log is not 788 * available, must queue work 789 * and return */ 790 if (!raidPtr->regionInfo[regionID].coreLog) 791 punt = RF_TRUE; /* blocked on log 792 * availability */ 793 } 794 } 795 } 796 /* if we didn't punt on this item, attempt to add a 797 * sector to the core log */ 798 if (!punt) { 799 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); 800 /* at this point, we have a core log with 801 * enough room for a sector */ 802 /* copy a sector into the log */ 803 log = raidPtr->regionInfo[regionID].coreLog; 804 RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog); 805 logItem = log->numRecords++; 806 log->records[logItem].parityAddr = item->diskAddress; 807 RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr); 808 RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity); 809 log->records[logItem].parityAddr.numSector = 1; 810 log->records[logItem].operation = item->common->operation; 811 memcpy((char *)log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), ((char *)item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), (1 << item->common->raidPtr->logBytesPerSector)); 812 item->diskAddress.numSector--; 813 item->diskAddress.startSector++; 814 if (item->diskAddress.numSector == 0) 815 done = RF_TRUE; 816 } 817 } 818 819 if (!punt) { 820 /* Processed this item completely, decrement count of 821 * items to be processed. */ 822 RF_ASSERT(item->diskAddress.numSector == 0); 823 rf_lock_mutex2(item->common->mutex); 824 item->common->cnt--; 825 if (item->common->cnt == 0) 826 itemDone = RF_TRUE; 827 else 828 itemDone = RF_FALSE; 829 rf_unlock_mutex2(item->common->mutex); 830 if (itemDone) { 831 /* Finished processing all log data for this 832 * IO Return structs to free list and invoke 833 * wakeup function. */ 834 timer = item->common->startTime; /* grab initial value of 835 * timer */ 836 RF_ETIMER_STOP(timer); 837 RF_ETIMER_EVAL(timer); 838 item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer); 839 if (rf_parityLogDebug) 840 printf("[waking process for region %d]\n", item->regionID); 841 wakeFunc = item->common->wakeFunc; 842 wakeArg = item->common->wakeArg; 843 FreeParityLogCommonData(item->common); 844 FreeParityLogData(item); 845 (wakeFunc) (wakeArg, 0); 846 } else 847 FreeParityLogData(item); 848 } 849 } 850 rf_unlock_mutex2(raidPtr->regionInfo[regionID].mutex); 851 if (rf_parityLogDebug) 852 printf("[exiting ParityLogAppend]\n"); 853 return (0); 854} 855 856 857void 858rf_EnableParityLogging(RF_Raid_t * raidPtr) 859{ 860 int regionID; 861 862 for (regionID = 0; regionID < rf_numParityRegions; regionID++) { 863 rf_lock_mutex2(raidPtr->regionInfo[regionID].mutex); 864 raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE; 865 rf_unlock_mutex2(raidPtr->regionInfo[regionID].mutex); 866 } 867 if (rf_parityLogDebug) 868 printf("[parity logging enabled]\n"); 869} 870#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 871