rf_dagdegrd.c revision 1.32
1/* $NetBSD: rf_dagdegrd.c,v 1.32 2021/07/23 00:54:45 oster Exp $ */ 2/* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29/* 30 * rf_dagdegrd.c 31 * 32 * code for creating degraded read DAGs 33 */ 34 35#include <sys/cdefs.h> 36__KERNEL_RCSID(0, "$NetBSD: rf_dagdegrd.c,v 1.32 2021/07/23 00:54:45 oster Exp $"); 37 38#include <dev/raidframe/raidframevar.h> 39 40#include "rf_archs.h" 41#include "rf_raid.h" 42#include "rf_dag.h" 43#include "rf_dagutils.h" 44#include "rf_dagfuncs.h" 45#include "rf_debugMem.h" 46#include "rf_general.h" 47#include "rf_dagdegrd.h" 48#include "rf_map.h" 49 50 51/****************************************************************************** 52 * 53 * General comments on DAG creation: 54 * 55 * All DAGs in this file use roll-away error recovery. Each DAG has a single 56 * commit node, usually called "Cmt." If an error occurs before the Cmt node 57 * is reached, the execution engine will halt forward execution and work 58 * backward through the graph, executing the undo functions. Assuming that 59 * each node in the graph prior to the Cmt node are undoable and atomic - or - 60 * does not make changes to permanent state, the graph will fail atomically. 61 * If an error occurs after the Cmt node executes, the engine will roll-forward 62 * through the graph, blindly executing nodes until it reaches the end. 63 * If a graph reaches the end, it is assumed to have completed successfully. 64 * 65 * A graph has only 1 Cmt node. 66 * 67 */ 68 69 70/****************************************************************************** 71 * 72 * The following wrappers map the standard DAG creation interface to the 73 * DAG creation routines. Additionally, these wrappers enable experimentation 74 * with new DAG structures by providing an extra level of indirection, allowing 75 * the DAG creation routines to be replaced at this single point. 76 */ 77 78void 79rf_CreateRaidFiveDegradedReadDAG(RF_Raid_t *raidPtr, 80 RF_AccessStripeMap_t *asmap, 81 RF_DagHeader_t *dag_h, 82 void *bp, 83 RF_RaidAccessFlags_t flags, 84 RF_AllocListElem_t *allocList) 85{ 86 rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 87 &rf_xorRecoveryFuncs); 88} 89 90 91/****************************************************************************** 92 * 93 * DAG creation code begins here 94 */ 95 96 97/****************************************************************************** 98 * Create a degraded read DAG for RAID level 1 99 * 100 * Hdr -> Nil -> R(p/s)d -> Commit -> Trm 101 * 102 * The "Rd" node reads data from the surviving disk in the mirror pair 103 * Rpd - read of primary copy 104 * Rsd - read of secondary copy 105 * 106 * Parameters: raidPtr - description of the physical array 107 * asmap - logical & physical addresses for this access 108 * bp - buffer ptr (for holding write data) 109 * flags - general flags (e.g. disk locking) 110 * allocList - list of memory allocated in DAG creation 111 *****************************************************************************/ 112 113void 114rf_CreateRaidOneDegradedReadDAG(RF_Raid_t *raidPtr, 115 RF_AccessStripeMap_t *asmap, 116 RF_DagHeader_t *dag_h, 117 void *bp, 118 RF_RaidAccessFlags_t flags, 119 RF_AllocListElem_t *allocList) 120{ 121 RF_DagNode_t *rdNode, *blockNode, *commitNode, *termNode; 122 RF_StripeNum_t parityStripeID; 123 RF_ReconUnitNum_t which_ru; 124 RF_PhysDiskAddr_t *pda; 125 int useMirror; 126 127 useMirror = 0; 128 parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), 129 asmap->raidAddress, &which_ru); 130#if RF_DEBUG_DAG 131 if (rf_dagDebug) { 132 printf("[Creating RAID level 1 degraded read DAG]\n"); 133 } 134#endif 135 dag_h->creator = "RaidOneDegradedReadDAG"; 136 /* alloc the Wnd nodes and the Wmir node */ 137 if (asmap->numDataFailed == 0) 138 useMirror = RF_FALSE; 139 else 140 useMirror = RF_TRUE; 141 142 /* total number of nodes = 1 + (block + commit + terminator) */ 143 144 rdNode = rf_AllocDAGNode(raidPtr); 145 rdNode->list_next = dag_h->nodes; 146 dag_h->nodes = rdNode; 147 148 blockNode = rf_AllocDAGNode(raidPtr); 149 blockNode->list_next = dag_h->nodes; 150 dag_h->nodes = blockNode; 151 152 commitNode = rf_AllocDAGNode(raidPtr); 153 commitNode->list_next = dag_h->nodes; 154 dag_h->nodes = commitNode; 155 156 termNode = rf_AllocDAGNode(raidPtr); 157 termNode->list_next = dag_h->nodes; 158 dag_h->nodes = termNode; 159 160 /* this dag can not commit until the commit node is reached. errors 161 * prior to the commit point imply the dag has failed and must be 162 * retried */ 163 dag_h->numCommitNodes = 1; 164 dag_h->numCommits = 0; 165 dag_h->numSuccedents = 1; 166 167 /* initialize the block, commit, and terminator nodes */ 168 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 169 NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); 170 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 171 NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); 172 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, 173 NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); 174 175 pda = asmap->physInfo; 176 RF_ASSERT(pda != NULL); 177 /* parityInfo must describe entire parity unit */ 178 RF_ASSERT(asmap->parityInfo->next == NULL); 179 180 /* initialize the data node */ 181 if (!useMirror) { 182 /* read primary copy of data */ 183 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 184 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList); 185 rdNode->params[0].p = pda; 186 rdNode->params[1].p = pda->bufPtr; 187 rdNode->params[2].v = parityStripeID; 188 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 189 which_ru); 190 } else { 191 /* read secondary copy of data */ 192 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 193 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList); 194 rdNode->params[0].p = asmap->parityInfo; 195 rdNode->params[1].p = pda->bufPtr; 196 rdNode->params[2].v = parityStripeID; 197 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 198 which_ru); 199 } 200 201 /* connect header to block node */ 202 RF_ASSERT(dag_h->numSuccedents == 1); 203 RF_ASSERT(blockNode->numAntecedents == 0); 204 dag_h->succedents[0] = blockNode; 205 206 /* connect block node to rdnode */ 207 RF_ASSERT(blockNode->numSuccedents == 1); 208 RF_ASSERT(rdNode->numAntecedents == 1); 209 blockNode->succedents[0] = rdNode; 210 rdNode->antecedents[0] = blockNode; 211 rdNode->antType[0] = rf_control; 212 213 /* connect rdnode to commit node */ 214 RF_ASSERT(rdNode->numSuccedents == 1); 215 RF_ASSERT(commitNode->numAntecedents == 1); 216 rdNode->succedents[0] = commitNode; 217 commitNode->antecedents[0] = rdNode; 218 commitNode->antType[0] = rf_control; 219 220 /* connect commit node to terminator */ 221 RF_ASSERT(commitNode->numSuccedents == 1); 222 RF_ASSERT(termNode->numAntecedents == 1); 223 RF_ASSERT(termNode->numSuccedents == 0); 224 commitNode->succedents[0] = termNode; 225 termNode->antecedents[0] = commitNode; 226 termNode->antType[0] = rf_control; 227} 228 229 230 231/****************************************************************************** 232 * 233 * creates a DAG to perform a degraded-mode read of data within one stripe. 234 * This DAG is as follows: 235 * 236 * Hdr -> Block -> Rud -> Xor -> Cmt -> T 237 * -> Rrd -> 238 * -> Rp --> 239 * 240 * Each R node is a successor of the L node 241 * One successor arc from each R node goes to C, and the other to X 242 * There is one Rud for each chunk of surviving user data requested by the 243 * user, and one Rrd for each chunk of surviving user data _not_ being read by 244 * the user 245 * R = read, ud = user data, rd = recovery (surviving) data, p = parity 246 * X = XOR, C = Commit, T = terminate 247 * 248 * The block node guarantees a single source node. 249 * 250 * Note: The target buffer for the XOR node is set to the actual user buffer 251 * where the failed data is supposed to end up. This buffer is zero'd by the 252 * code here. Thus, if you create a degraded read dag, use it, and then 253 * re-use, you have to be sure to zero the target buffer prior to the re-use. 254 * 255 * The recfunc argument at the end specifies the name and function used for 256 * the redundancy 257 * recovery function. 258 * 259 *****************************************************************************/ 260 261void 262rf_CreateDegradedReadDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, 263 RF_DagHeader_t *dag_h, void *bp, 264 RF_RaidAccessFlags_t flags, 265 RF_AllocListElem_t *allocList, 266 const RF_RedFuncs_t *recFunc) 267{ 268 RF_DagNode_t *rudNodes, *rrdNodes, *xorNode, *blockNode; 269 RF_DagNode_t *commitNode, *rpNode, *termNode; 270 RF_DagNode_t *tmpNode, *tmprudNode, *tmprrdNode; 271 int nRrdNodes, nRudNodes, nXorBufs, i; 272 int j, paramNum; 273 RF_SectorCount_t sectorsPerSU; 274 RF_ReconUnitNum_t which_ru; 275 char overlappingPDAs[RF_MAXCOL];/* a temporary array of flags */ 276 RF_AccessStripeMapHeader_t *new_asm_h[2]; 277 RF_PhysDiskAddr_t *pda, *parityPDA; 278 RF_StripeNum_t parityStripeID; 279 RF_PhysDiskAddr_t *failedPDA; 280 RF_RaidLayout_t *layoutPtr; 281 char *rpBuf; 282 283 layoutPtr = &(raidPtr->Layout); 284 /* failedPDA points to the pda within the asm that targets the failed 285 * disk */ 286 failedPDA = asmap->failedPDAs[0]; 287 parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, 288 asmap->raidAddress, &which_ru); 289 sectorsPerSU = layoutPtr->sectorsPerStripeUnit; 290 291#if RF_DEBUG_DAG 292 if (rf_dagDebug) { 293 printf("[Creating degraded read DAG]\n"); 294 } 295#endif 296 RF_ASSERT(asmap->numDataFailed == 1); 297 dag_h->creator = "DegradedReadDAG"; 298 299 /* 300 * generate two ASMs identifying the surviving data we need 301 * in order to recover the lost data 302 */ 303 304 /* overlappingPDAs array must be zero'd */ 305 memset(overlappingPDAs, 0, RF_MAXCOL); 306 rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h, &nXorBufs, 307 &rpBuf, overlappingPDAs, allocList); 308 309 /* 310 * create all the nodes at once 311 * 312 * -1 because no access is generated for the failed pda 313 */ 314 nRudNodes = asmap->numStripeUnitsAccessed - 1; 315 nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) + 316 ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0); 317 318 blockNode = rf_AllocDAGNode(raidPtr); 319 blockNode->list_next = dag_h->nodes; 320 dag_h->nodes = blockNode; 321 322 commitNode = rf_AllocDAGNode(raidPtr); 323 commitNode->list_next = dag_h->nodes; 324 dag_h->nodes = commitNode; 325 326 xorNode = rf_AllocDAGNode(raidPtr); 327 xorNode->list_next = dag_h->nodes; 328 dag_h->nodes = xorNode; 329 330 rpNode = rf_AllocDAGNode(raidPtr); 331 rpNode->list_next = dag_h->nodes; 332 dag_h->nodes = rpNode; 333 334 termNode = rf_AllocDAGNode(raidPtr); 335 termNode->list_next = dag_h->nodes; 336 dag_h->nodes = termNode; 337 338 for (i = 0; i < nRudNodes; i++) { 339 tmpNode = rf_AllocDAGNode(raidPtr); 340 tmpNode->list_next = dag_h->nodes; 341 dag_h->nodes = tmpNode; 342 } 343 rudNodes = dag_h->nodes; 344 345 for (i = 0; i < nRrdNodes; i++) { 346 tmpNode = rf_AllocDAGNode(raidPtr); 347 tmpNode->list_next = dag_h->nodes; 348 dag_h->nodes = tmpNode; 349 } 350 rrdNodes = dag_h->nodes; 351 352 /* initialize nodes */ 353 dag_h->numCommitNodes = 1; 354 dag_h->numCommits = 0; 355 /* this dag can not commit until the commit node is reached errors 356 * prior to the commit point imply the dag has failed */ 357 dag_h->numSuccedents = 1; 358 359 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 360 NULL, nRudNodes + nRrdNodes + 1, 0, 0, 0, dag_h, "Nil", allocList); 361 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 362 NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); 363 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, 364 NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); 365 rf_InitNode(xorNode, rf_wait, RF_FALSE, recFunc->simple, rf_NullNodeUndoFunc, 366 NULL, 1, nRudNodes + nRrdNodes + 1, 2 * nXorBufs + 2, 1, dag_h, 367 recFunc->SimpleName, allocList); 368 369 /* fill in the Rud nodes */ 370 tmprudNode = rudNodes; 371 for (pda = asmap->physInfo, i = 0; i < nRudNodes; i++, pda = pda->next) { 372 if (pda == failedPDA) { 373 i--; 374 continue; 375 } 376 rf_InitNode(tmprudNode, rf_wait, RF_FALSE, rf_DiskReadFunc, 377 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, 378 "Rud", allocList); 379 RF_ASSERT(pda); 380 tmprudNode->params[0].p = pda; 381 tmprudNode->params[1].p = pda->bufPtr; 382 tmprudNode->params[2].v = parityStripeID; 383 tmprudNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 384 tmprudNode = tmprudNode->list_next; 385 } 386 387 /* fill in the Rrd nodes */ 388 i = 0; 389 tmprrdNode = rrdNodes; 390 if (new_asm_h[0]) { 391 for (pda = new_asm_h[0]->stripeMap->physInfo; 392 i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed; 393 i++, pda = pda->next) { 394 rf_InitNode(tmprrdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, 395 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, 396 dag_h, "Rrd", allocList); 397 RF_ASSERT(pda); 398 tmprrdNode->params[0].p = pda; 399 tmprrdNode->params[1].p = pda->bufPtr; 400 tmprrdNode->params[2].v = parityStripeID; 401 tmprrdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 402 tmprrdNode = tmprrdNode->list_next; 403 } 404 } 405 if (new_asm_h[1]) { 406 /* tmprrdNode = rrdNodes; */ /* don't set this here -- old code was using i+j, which means 407 we need to just continue using tmprrdNode for the next 'j' elements. */ 408 for (j = 0, pda = new_asm_h[1]->stripeMap->physInfo; 409 j < new_asm_h[1]->stripeMap->numStripeUnitsAccessed; 410 j++, pda = pda->next) { 411 rf_InitNode(tmprrdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, 412 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, 413 dag_h, "Rrd", allocList); 414 RF_ASSERT(pda); 415 tmprrdNode->params[0].p = pda; 416 tmprrdNode->params[1].p = pda->bufPtr; 417 tmprrdNode->params[2].v = parityStripeID; 418 tmprrdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 419 tmprrdNode = tmprrdNode->list_next; 420 } 421 } 422 /* make a PDA for the parity unit */ 423 parityPDA = rf_AllocPhysDiskAddr(raidPtr); 424 parityPDA->next = dag_h->pda_cleanup_list; 425 dag_h->pda_cleanup_list = parityPDA; 426 parityPDA->col = asmap->parityInfo->col; 427 parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU) 428 * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); 429 parityPDA->numSector = failedPDA->numSector; 430 431 /* initialize the Rp node */ 432 rf_InitNode(rpNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 433 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rp ", allocList); 434 rpNode->params[0].p = parityPDA; 435 rpNode->params[1].p = rpBuf; 436 rpNode->params[2].v = parityStripeID; 437 rpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 438 439 /* 440 * the last and nastiest step is to assign all 441 * the parameters of the Xor node 442 */ 443 paramNum = 0; 444 tmprrdNode = rrdNodes; 445 for (i = 0; i < nRrdNodes; i++) { 446 /* all the Rrd nodes need to be xored together */ 447 xorNode->params[paramNum++] = tmprrdNode->params[0]; 448 xorNode->params[paramNum++] = tmprrdNode->params[1]; 449 tmprrdNode = tmprrdNode->list_next; 450 } 451 tmprudNode = rudNodes; 452 for (i = 0; i < nRudNodes; i++) { 453 /* any Rud nodes that overlap the failed access need to be 454 * xored in */ 455 if (overlappingPDAs[i]) { 456 pda = rf_AllocPhysDiskAddr(raidPtr); 457 memcpy((char *) pda, (char *) tmprudNode->params[0].p, sizeof(RF_PhysDiskAddr_t)); 458 /* add it into the pda_cleanup_list *after* the copy, TYVM */ 459 pda->next = dag_h->pda_cleanup_list; 460 dag_h->pda_cleanup_list = pda; 461 rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0); 462 xorNode->params[paramNum++].p = pda; 463 xorNode->params[paramNum++].p = pda->bufPtr; 464 } 465 tmprudNode = tmprudNode->list_next; 466 } 467 468 /* install parity pda as last set of params to be xor'd */ 469 xorNode->params[paramNum++].p = parityPDA; 470 xorNode->params[paramNum++].p = rpBuf; 471 472 /* 473 * the last 2 params to the recovery xor node are 474 * the failed PDA and the raidPtr 475 */ 476 xorNode->params[paramNum++].p = failedPDA; 477 xorNode->params[paramNum++].p = raidPtr; 478 RF_ASSERT(paramNum == 2 * nXorBufs + 2); 479 480 /* 481 * The xor node uses results[0] as the target buffer. 482 * Set pointer and zero the buffer. In the kernel, this 483 * may be a user buffer in which case we have to remap it. 484 */ 485 xorNode->results[0] = failedPDA->bufPtr; 486 memset(failedPDA->bufPtr, 0, rf_RaidAddressToByte(raidPtr, 487 failedPDA->numSector)); 488 489 /* connect nodes to form graph */ 490 /* connect the header to the block node */ 491 RF_ASSERT(dag_h->numSuccedents == 1); 492 RF_ASSERT(blockNode->numAntecedents == 0); 493 dag_h->succedents[0] = blockNode; 494 495 /* connect the block node to the read nodes */ 496 RF_ASSERT(blockNode->numSuccedents == (1 + nRrdNodes + nRudNodes)); 497 RF_ASSERT(rpNode->numAntecedents == 1); 498 blockNode->succedents[0] = rpNode; 499 rpNode->antecedents[0] = blockNode; 500 rpNode->antType[0] = rf_control; 501 tmprrdNode = rrdNodes; 502 for (i = 0; i < nRrdNodes; i++) { 503 RF_ASSERT(tmprrdNode->numSuccedents == 1); 504 blockNode->succedents[1 + i] = tmprrdNode; 505 tmprrdNode->antecedents[0] = blockNode; 506 tmprrdNode->antType[0] = rf_control; 507 tmprrdNode = tmprrdNode->list_next; 508 } 509 tmprudNode = rudNodes; 510 for (i = 0; i < nRudNodes; i++) { 511 RF_ASSERT(tmprudNode->numSuccedents == 1); 512 blockNode->succedents[1 + nRrdNodes + i] = tmprudNode; 513 tmprudNode->antecedents[0] = blockNode; 514 tmprudNode->antType[0] = rf_control; 515 tmprudNode = tmprudNode->list_next; 516 } 517 518 /* connect the read nodes to the xor node */ 519 RF_ASSERT(xorNode->numAntecedents == (1 + nRrdNodes + nRudNodes)); 520 RF_ASSERT(rpNode->numSuccedents == 1); 521 rpNode->succedents[0] = xorNode; 522 xorNode->antecedents[0] = rpNode; 523 xorNode->antType[0] = rf_trueData; 524 tmprrdNode = rrdNodes; 525 for (i = 0; i < nRrdNodes; i++) { 526 RF_ASSERT(tmprrdNode->numSuccedents == 1); 527 tmprrdNode->succedents[0] = xorNode; 528 xorNode->antecedents[1 + i] = tmprrdNode; 529 xorNode->antType[1 + i] = rf_trueData; 530 tmprrdNode = tmprrdNode->list_next; 531 } 532 tmprudNode = rudNodes; 533 for (i = 0; i < nRudNodes; i++) { 534 RF_ASSERT(tmprudNode->numSuccedents == 1); 535 tmprudNode->succedents[0] = xorNode; 536 xorNode->antecedents[1 + nRrdNodes + i] = tmprudNode; 537 xorNode->antType[1 + nRrdNodes + i] = rf_trueData; 538 tmprudNode = tmprudNode->list_next; 539 } 540 541 /* connect the xor node to the commit node */ 542 RF_ASSERT(xorNode->numSuccedents == 1); 543 RF_ASSERT(commitNode->numAntecedents == 1); 544 xorNode->succedents[0] = commitNode; 545 commitNode->antecedents[0] = xorNode; 546 commitNode->antType[0] = rf_control; 547 548 /* connect the termNode to the commit node */ 549 RF_ASSERT(commitNode->numSuccedents == 1); 550 RF_ASSERT(termNode->numAntecedents == 1); 551 RF_ASSERT(termNode->numSuccedents == 0); 552 commitNode->succedents[0] = termNode; 553 termNode->antType[0] = rf_control; 554 termNode->antecedents[0] = commitNode; 555} 556 557#if (RF_INCLUDE_CHAINDECLUSTER > 0) 558/****************************************************************************** 559 * Create a degraded read DAG for Chained Declustering 560 * 561 * Hdr -> Nil -> R(p/s)d -> Cmt -> Trm 562 * 563 * The "Rd" node reads data from the surviving disk in the mirror pair 564 * Rpd - read of primary copy 565 * Rsd - read of secondary copy 566 * 567 * Parameters: raidPtr - description of the physical array 568 * asmap - logical & physical addresses for this access 569 * bp - buffer ptr (for holding write data) 570 * flags - general flags (e.g. disk locking) 571 * allocList - list of memory allocated in DAG creation 572 *****************************************************************************/ 573 574void 575rf_CreateRaidCDegradedReadDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, 576 RF_DagHeader_t *dag_h, void *bp, 577 RF_RaidAccessFlags_t flags, 578 RF_AllocListElem_t *allocList) 579{ 580 RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode; 581 RF_StripeNum_t parityStripeID; 582 int useMirror, i, shiftable; 583 RF_ReconUnitNum_t which_ru; 584 RF_PhysDiskAddr_t *pda; 585 586 if ((asmap->numDataFailed + asmap->numParityFailed) == 0) { 587 shiftable = RF_TRUE; 588 } else { 589 shiftable = RF_FALSE; 590 } 591 useMirror = 0; 592 parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), 593 asmap->raidAddress, &which_ru); 594 595#if RF_DEBUG_DAG 596 if (rf_dagDebug) { 597 printf("[Creating RAID C degraded read DAG]\n"); 598 } 599#endif 600 dag_h->creator = "RaidCDegradedReadDAG"; 601 /* alloc the Wnd nodes and the Wmir node */ 602 if (asmap->numDataFailed == 0) 603 useMirror = RF_FALSE; 604 else 605 useMirror = RF_TRUE; 606 607 /* total number of nodes = 1 + (block + commit + terminator) */ 608 nodes = RF_MallocAndAdd(4 * sizeof(*nodes), allocList); 609 i = 0; 610 rdNode = &nodes[i]; 611 i++; 612 blockNode = &nodes[i]; 613 i++; 614 commitNode = &nodes[i]; 615 i++; 616 termNode = &nodes[i]; 617 i++; 618 619 /* 620 * This dag can not commit until the commit node is reached. 621 * Errors prior to the commit point imply the dag has failed 622 * and must be retried. 623 */ 624 dag_h->numCommitNodes = 1; 625 dag_h->numCommits = 0; 626 dag_h->numSuccedents = 1; 627 628 /* initialize the block, commit, and terminator nodes */ 629 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 630 NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); 631 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 632 NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); 633 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, 634 NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); 635 636 pda = asmap->physInfo; 637 RF_ASSERT(pda != NULL); 638 /* parityInfo must describe entire parity unit */ 639 RF_ASSERT(asmap->parityInfo->next == NULL); 640 641 /* initialize the data node */ 642 if (!useMirror) { 643 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 644 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList); 645 if (shiftable && rf_compute_workload_shift(raidPtr, pda)) { 646 /* shift this read to the next disk in line */ 647 rdNode->params[0].p = asmap->parityInfo; 648 rdNode->params[1].p = pda->bufPtr; 649 rdNode->params[2].v = parityStripeID; 650 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 651 } else { 652 /* read primary copy */ 653 rdNode->params[0].p = pda; 654 rdNode->params[1].p = pda->bufPtr; 655 rdNode->params[2].v = parityStripeID; 656 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 657 } 658 } else { 659 /* read secondary copy of data */ 660 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 661 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList); 662 rdNode->params[0].p = asmap->parityInfo; 663 rdNode->params[1].p = pda->bufPtr; 664 rdNode->params[2].v = parityStripeID; 665 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 666 } 667 668 /* connect header to block node */ 669 RF_ASSERT(dag_h->numSuccedents == 1); 670 RF_ASSERT(blockNode->numAntecedents == 0); 671 dag_h->succedents[0] = blockNode; 672 673 /* connect block node to rdnode */ 674 RF_ASSERT(blockNode->numSuccedents == 1); 675 RF_ASSERT(rdNode->numAntecedents == 1); 676 blockNode->succedents[0] = rdNode; 677 rdNode->antecedents[0] = blockNode; 678 rdNode->antType[0] = rf_control; 679 680 /* connect rdnode to commit node */ 681 RF_ASSERT(rdNode->numSuccedents == 1); 682 RF_ASSERT(commitNode->numAntecedents == 1); 683 rdNode->succedents[0] = commitNode; 684 commitNode->antecedents[0] = rdNode; 685 commitNode->antType[0] = rf_control; 686 687 /* connect commit node to terminator */ 688 RF_ASSERT(commitNode->numSuccedents == 1); 689 RF_ASSERT(termNode->numAntecedents == 1); 690 RF_ASSERT(termNode->numSuccedents == 0); 691 commitNode->succedents[0] = termNode; 692 termNode->antecedents[0] = commitNode; 693 termNode->antType[0] = rf_control; 694} 695#endif /* (RF_INCLUDE_CHAINDECLUSTER > 0) */ 696 697#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) 698/* 699 * XXX move this elsewhere? 700 */ 701void 702rf_DD_GenerateFailedAccessASMs(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, 703 RF_PhysDiskAddr_t **pdap, int *nNodep, 704 RF_PhysDiskAddr_t **pqpdap, int *nPQNodep, 705 RF_AllocListElem_t *allocList) 706{ 707 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 708 int PDAPerDisk, i; 709 RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; 710 int numDataCol = layoutPtr->numDataCol; 711 int state; 712 RF_SectorNum_t suoff, suend; 713 unsigned firstDataCol, napdas, count; 714 RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end = 0; 715 RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0], *ftwo = asmap->failedPDAs[1]; 716 RF_PhysDiskAddr_t *pda_p; 717 RF_PhysDiskAddr_t *phys_p; 718 RF_RaidAddr_t sosAddr; 719 720 /* determine how many pda's we will have to generate per unaccess 721 * stripe. If there is only one failed data unit, it is one; if two, 722 * possibly two, depending whether they overlap. */ 723 724 fone_start = rf_StripeUnitOffset(layoutPtr, fone->startSector); 725 fone_end = fone_start + fone->numSector; 726 727#define BUF_ALLOC(num) \ 728 RF_MallocAndAdd(rf_RaidAddressToByte(raidPtr, num), allocList) 729#define CONS_PDA(if,start,num) \ 730 pda_p->col = asmap->if->col; \ 731 pda_p->startSector = ((asmap->if->startSector / secPerSU) * secPerSU) + start; \ 732 pda_p->numSector = num; \ 733 pda_p->next = NULL; \ 734 pda_p->bufPtr = BUF_ALLOC(num) 735 736 if (asmap->numDataFailed == 1) { 737 PDAPerDisk = 1; 738 state = 1; 739 *pqpdap = RF_MallocAndAdd(2 * sizeof(**pqpdap), allocList); 740 pda_p = *pqpdap; 741 /* build p */ 742 CONS_PDA(parityInfo, fone_start, fone->numSector); 743 pda_p->type = RF_PDA_TYPE_PARITY; 744 pda_p++; 745 /* build q */ 746 CONS_PDA(qInfo, fone_start, fone->numSector); 747 pda_p->type = RF_PDA_TYPE_Q; 748 } else { 749 ftwo_start = rf_StripeUnitOffset(layoutPtr, ftwo->startSector); 750 ftwo_end = ftwo_start + ftwo->numSector; 751 if (fone->numSector + ftwo->numSector > secPerSU) { 752 PDAPerDisk = 1; 753 state = 2; 754 *pqpdap = RF_MallocAndAdd(2 * sizeof(**pqpdap), allocList); 755 pda_p = *pqpdap; 756 CONS_PDA(parityInfo, 0, secPerSU); 757 pda_p->type = RF_PDA_TYPE_PARITY; 758 pda_p++; 759 CONS_PDA(qInfo, 0, secPerSU); 760 pda_p->type = RF_PDA_TYPE_Q; 761 } else { 762 PDAPerDisk = 2; 763 state = 3; 764 /* four of them, fone, then ftwo */ 765 *pqpdap = RF_MallocAndAdd(4 * sizeof(**pqpdap), allocList); 766 pda_p = *pqpdap; 767 CONS_PDA(parityInfo, fone_start, fone->numSector); 768 pda_p->type = RF_PDA_TYPE_PARITY; 769 pda_p++; 770 CONS_PDA(qInfo, fone_start, fone->numSector); 771 pda_p->type = RF_PDA_TYPE_Q; 772 pda_p++; 773 CONS_PDA(parityInfo, ftwo_start, ftwo->numSector); 774 pda_p->type = RF_PDA_TYPE_PARITY; 775 pda_p++; 776 CONS_PDA(qInfo, ftwo_start, ftwo->numSector); 777 pda_p->type = RF_PDA_TYPE_Q; 778 } 779 } 780 /* figure out number of nonaccessed pda */ 781 napdas = PDAPerDisk * (numDataCol - asmap->numStripeUnitsAccessed - (ftwo == NULL ? 1 : 0)); 782 *nPQNodep = PDAPerDisk; 783 784 /* sweep over the over accessed pda's, figuring out the number of 785 * additional pda's to generate. Of course, skip the failed ones */ 786 787 count = 0; 788 for (pda_p = asmap->physInfo; pda_p; pda_p = pda_p->next) { 789 if ((pda_p == fone) || (pda_p == ftwo)) 790 continue; 791 suoff = rf_StripeUnitOffset(layoutPtr, pda_p->startSector); 792 suend = suoff + pda_p->numSector; 793 switch (state) { 794 case 1: /* one failed PDA to overlap */ 795 /* if a PDA doesn't contain the failed unit, it can 796 * only miss the start or end, not both */ 797 if ((suoff > fone_start) || (suend < fone_end)) 798 count++; 799 break; 800 case 2: /* whole stripe */ 801 if (suoff) /* leak at begining */ 802 count++; 803 if (suend < numDataCol) /* leak at end */ 804 count++; 805 break; 806 case 3: /* two disjoint units */ 807 if ((suoff > fone_start) || (suend < fone_end)) 808 count++; 809 if ((suoff > ftwo_start) || (suend < ftwo_end)) 810 count++; 811 break; 812 default: 813 RF_PANIC(); 814 } 815 } 816 817 napdas += count; 818 *nNodep = napdas; 819 if (napdas == 0) 820 return; /* short circuit */ 821 822 /* allocate up our list of pda's */ 823 824 pda_p = RF_MallocAndAdd(napdas * sizeof(*pdap), allocList); 825 *pdap = pda_p; 826 827 /* linkem together */ 828 for (i = 0; i < (napdas - 1); i++) 829 pda_p[i].next = pda_p + (i + 1); 830 831 /* march through the one's up to the first accessed disk */ 832 firstDataCol = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), asmap->physInfo->raidAddress) % numDataCol; 833 sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); 834 for (i = 0; i < firstDataCol; i++) { 835 if ((pda_p - (*pdap)) == napdas) 836 continue; 837 pda_p->type = RF_PDA_TYPE_DATA; 838 pda_p->raidAddress = sosAddr + (i * secPerSU); 839 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 840 /* skip over dead disks */ 841 if (RF_DEAD_DISK(raidPtr->Disks[pda_p->col].status)) 842 continue; 843 switch (state) { 844 case 1: /* fone */ 845 pda_p->numSector = fone->numSector; 846 pda_p->raidAddress += fone_start; 847 pda_p->startSector += fone_start; 848 pda_p->bufPtr = BUF_ALLOC(pda_p->numSector); 849 break; 850 case 2: /* full stripe */ 851 pda_p->numSector = secPerSU; 852 pda_p->bufPtr = BUF_ALLOC(secPerSU); 853 break; 854 case 3: /* two slabs */ 855 pda_p->numSector = fone->numSector; 856 pda_p->raidAddress += fone_start; 857 pda_p->startSector += fone_start; 858 pda_p->bufPtr = BUF_ALLOC(pda_p->numSector); 859 pda_p++; 860 pda_p->type = RF_PDA_TYPE_DATA; 861 pda_p->raidAddress = sosAddr + (i * secPerSU); 862 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 863 pda_p->numSector = ftwo->numSector; 864 pda_p->raidAddress += ftwo_start; 865 pda_p->startSector += ftwo_start; 866 pda_p->bufPtr = BUF_ALLOC(pda_p->numSector); 867 break; 868 default: 869 RF_PANIC(); 870 } 871 pda_p++; 872 } 873 874 /* march through the touched stripe units */ 875 for (phys_p = asmap->physInfo; phys_p; phys_p = phys_p->next, i++) { 876 if ((phys_p == asmap->failedPDAs[0]) || (phys_p == asmap->failedPDAs[1])) 877 continue; 878 suoff = rf_StripeUnitOffset(layoutPtr, phys_p->startSector); 879 suend = suoff + phys_p->numSector; 880 switch (state) { 881 case 1: /* single buffer */ 882 if (suoff > fone_start) { 883 RF_ASSERT(suend >= fone_end); 884 /* The data read starts after the mapped 885 * access, snip off the begining */ 886 pda_p->numSector = suoff - fone_start; 887 pda_p->raidAddress = sosAddr + (i * secPerSU) + fone_start; 888 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 889 pda_p->bufPtr = BUF_ALLOC(pda_p->numSector); 890 pda_p++; 891 } 892 if (suend < fone_end) { 893 RF_ASSERT(suoff <= fone_start); 894 /* The data read stops before the end of the 895 * failed access, extend */ 896 pda_p->numSector = fone_end - suend; 897 pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ 898 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 899 pda_p->bufPtr = BUF_ALLOC(pda_p->numSector); 900 pda_p++; 901 } 902 break; 903 case 2: /* whole stripe unit */ 904 RF_ASSERT((suoff == 0) || (suend == secPerSU)); 905 if (suend < secPerSU) { /* short read, snip from end 906 * on */ 907 pda_p->numSector = secPerSU - suend; 908 pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ 909 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 910 pda_p->bufPtr = BUF_ALLOC(pda_p->numSector); 911 pda_p++; 912 } else 913 if (suoff > 0) { /* short at front */ 914 pda_p->numSector = suoff; 915 pda_p->raidAddress = sosAddr + (i * secPerSU); 916 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 917 pda_p->bufPtr = 918 BUF_ALLOC(pda_p->numSector); 919 pda_p++; 920 } 921 break; 922 case 3: /* two nonoverlapping failures */ 923 if ((suoff > fone_start) || (suend < fone_end)) { 924 if (suoff > fone_start) { 925 RF_ASSERT(suend >= fone_end); 926 /* The data read starts after the 927 * mapped access, snip off the 928 * begining */ 929 pda_p->numSector = suoff - fone_start; 930 pda_p->raidAddress = sosAddr + (i * secPerSU) + fone_start; 931 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 932 pda_p->bufPtr = 933 BUF_ALLOC(pda_p->numSector); 934 pda_p++; 935 } 936 if (suend < fone_end) { 937 RF_ASSERT(suoff <= fone_start); 938 /* The data read stops before the end 939 * of the failed access, extend */ 940 pda_p->numSector = fone_end - suend; 941 pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ 942 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 943 pda_p->bufPtr = 944 BUF_ALLOC(pda_p->numSector); 945 pda_p++; 946 } 947 } 948 if ((suoff > ftwo_start) || (suend < ftwo_end)) { 949 if (suoff > ftwo_start) { 950 RF_ASSERT(suend >= ftwo_end); 951 /* The data read starts after the 952 * mapped access, snip off the 953 * begining */ 954 pda_p->numSector = suoff - ftwo_start; 955 pda_p->raidAddress = sosAddr + (i * secPerSU) + ftwo_start; 956 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 957 pda_p->bufPtr = 958 BUF_ALLOC(pda_p->numSector); 959 pda_p++; 960 } 961 if (suend < ftwo_end) { 962 RF_ASSERT(suoff <= ftwo_start); 963 /* The data read stops before the end 964 * of the failed access, extend */ 965 pda_p->numSector = ftwo_end - suend; 966 pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ 967 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 968 pda_p->bufPtr = 969 BUF_ALLOC(pda_p->numSector); 970 pda_p++; 971 } 972 } 973 break; 974 default: 975 RF_PANIC(); 976 } 977 } 978 979 /* after the last accessed disk */ 980 for (; i < numDataCol; i++) { 981 if ((pda_p - (*pdap)) == napdas) 982 continue; 983 pda_p->type = RF_PDA_TYPE_DATA; 984 pda_p->raidAddress = sosAddr + (i * secPerSU); 985 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 986 /* skip over dead disks */ 987 if (RF_DEAD_DISK(raidPtr->Disks[pda_p->col].status)) 988 continue; 989 switch (state) { 990 case 1: /* fone */ 991 pda_p->numSector = fone->numSector; 992 pda_p->raidAddress += fone_start; 993 pda_p->startSector += fone_start; 994 pda_p->bufPtr = BUF_ALLOC(pda_p->numSector); 995 break; 996 case 2: /* full stripe */ 997 pda_p->numSector = secPerSU; 998 pda_p->bufPtr = BUF_ALLOC(secPerSU); 999 break; 1000 case 3: /* two slabs */ 1001 pda_p->numSector = fone->numSector; 1002 pda_p->raidAddress += fone_start; 1003 pda_p->startSector += fone_start; 1004 pda_p->bufPtr = BUF_ALLOC(pda_p->numSector); 1005 pda_p++; 1006 pda_p->type = RF_PDA_TYPE_DATA; 1007 pda_p->raidAddress = sosAddr + (i * secPerSU); 1008 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 1009 pda_p->numSector = ftwo->numSector; 1010 pda_p->raidAddress += ftwo_start; 1011 pda_p->startSector += ftwo_start; 1012 pda_p->bufPtr = BUF_ALLOC(pda_p->numSector); 1013 break; 1014 default: 1015 RF_PANIC(); 1016 } 1017 pda_p++; 1018 } 1019 1020 RF_ASSERT(pda_p - *pdap == napdas); 1021 return; 1022} 1023#define INIT_DISK_NODE(node,name) \ 1024rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2,1,4,0, dag_h, name, allocList); \ 1025(node)->succedents[0] = unblockNode; \ 1026(node)->succedents[1] = recoveryNode; \ 1027(node)->antecedents[0] = blockNode; \ 1028(node)->antType[0] = rf_control 1029 1030#define DISK_NODE_PARAMS(_node_,_p_) \ 1031 (_node_).params[0].p = _p_ ; \ 1032 (_node_).params[1].p = (_p_)->bufPtr; \ 1033 (_node_).params[2].v = parityStripeID; \ 1034 (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru) 1035 1036void 1037rf_DoubleDegRead(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, 1038 RF_DagHeader_t *dag_h, void *bp, 1039 RF_RaidAccessFlags_t flags, 1040 RF_AllocListElem_t *allocList, 1041 const char *redundantReadNodeName, 1042 const char *recoveryNodeName, 1043 void (*recovFunc) (RF_DagNode_t *)) 1044{ 1045 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 1046 RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *recoveryNode, *blockNode, 1047 *unblockNode, *rpNodes, *rqNodes, *termNode; 1048 RF_PhysDiskAddr_t *pda, *pqPDAs; 1049 RF_PhysDiskAddr_t *npdas; 1050 int nNodes, nRrdNodes, nRudNodes, i; 1051 RF_ReconUnitNum_t which_ru; 1052 int nReadNodes, nPQNodes; 1053 RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0]; 1054 RF_PhysDiskAddr_t *failedPDAtwo = asmap->failedPDAs[1]; 1055 RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru); 1056 1057#if RF_DEBUG_DAG 1058 if (rf_dagDebug) 1059 printf("[Creating Double Degraded Read DAG]\n"); 1060#endif 1061 rf_DD_GenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, &pqPDAs, &nPQNodes, allocList); 1062 1063 nRudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed); 1064 nReadNodes = nRrdNodes + nRudNodes + 2 * nPQNodes; 1065 nNodes = 4 /* block, unblock, recovery, term */ + nReadNodes; 1066 1067 nodes = RF_MallocAndAdd(nNodes * sizeof(*nodes), allocList); 1068 i = 0; 1069 blockNode = &nodes[i]; 1070 i += 1; 1071 unblockNode = &nodes[i]; 1072 i += 1; 1073 recoveryNode = &nodes[i]; 1074 i += 1; 1075 termNode = &nodes[i]; 1076 i += 1; 1077 rudNodes = &nodes[i]; 1078 i += nRudNodes; 1079 rrdNodes = &nodes[i]; 1080 i += nRrdNodes; 1081 rpNodes = &nodes[i]; 1082 i += nPQNodes; 1083 rqNodes = &nodes[i]; 1084 i += nPQNodes; 1085 RF_ASSERT(i == nNodes); 1086 1087 dag_h->numSuccedents = 1; 1088 dag_h->succedents[0] = blockNode; 1089 dag_h->creator = "DoubleDegRead"; 1090 dag_h->numCommits = 0; 1091 dag_h->numCommitNodes = 1; /* unblock */ 1092 1093 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 2, 0, 0, dag_h, "Trm", allocList); 1094 termNode->antecedents[0] = unblockNode; 1095 termNode->antType[0] = rf_control; 1096 termNode->antecedents[1] = recoveryNode; 1097 termNode->antType[1] = rf_control; 1098 1099 /* init the block and unblock nodes */ 1100 /* The block node has all nodes except itself, unblock and recovery as 1101 * successors. Similarly for predecessors of the unblock. */ 1102 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, "Nil", allocList); 1103 rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nReadNodes, 0, 0, dag_h, "Nil", allocList); 1104 1105 for (i = 0; i < nReadNodes; i++) { 1106 blockNode->succedents[i] = rudNodes + i; 1107 unblockNode->antecedents[i] = rudNodes + i; 1108 unblockNode->antType[i] = rf_control; 1109 } 1110 unblockNode->succedents[0] = termNode; 1111 1112 /* The recovery node has all the reads as predecessors, and the term 1113 * node as successors. It gets a pda as a param from each of the read 1114 * nodes plus the raidPtr. For each failed unit is has a result pda. */ 1115 rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, rf_NullNodeUndoFunc, NULL, 1116 1, /* succesors */ 1117 nReadNodes, /* preds */ 1118 nReadNodes + 2, /* params */ 1119 asmap->numDataFailed, /* results */ 1120 dag_h, recoveryNodeName, allocList); 1121 1122 recoveryNode->succedents[0] = termNode; 1123 for (i = 0; i < nReadNodes; i++) { 1124 recoveryNode->antecedents[i] = rudNodes + i; 1125 recoveryNode->antType[i] = rf_trueData; 1126 } 1127 1128 /* build the read nodes, then come back and fill in recovery params 1129 * and results */ 1130 pda = asmap->physInfo; 1131 for (i = 0; i < nRudNodes; pda = pda->next) { 1132 if ((pda == failedPDA) || (pda == failedPDAtwo)) 1133 continue; 1134 INIT_DISK_NODE(rudNodes + i, "Rud"); 1135 RF_ASSERT(pda); 1136 DISK_NODE_PARAMS(rudNodes[i], pda); 1137 i++; 1138 } 1139 1140 pda = npdas; 1141 for (i = 0; i < nRrdNodes; i++, pda = pda->next) { 1142 INIT_DISK_NODE(rrdNodes + i, "Rrd"); 1143 RF_ASSERT(pda); 1144 DISK_NODE_PARAMS(rrdNodes[i], pda); 1145 } 1146 1147 /* redundancy pdas */ 1148 pda = pqPDAs; 1149 INIT_DISK_NODE(rpNodes, "Rp"); 1150 RF_ASSERT(pda); 1151 DISK_NODE_PARAMS(rpNodes[0], pda); 1152 pda++; 1153 INIT_DISK_NODE(rqNodes, redundantReadNodeName); 1154 RF_ASSERT(pda); 1155 DISK_NODE_PARAMS(rqNodes[0], pda); 1156 if (nPQNodes == 2) { 1157 pda++; 1158 INIT_DISK_NODE(rpNodes + 1, "Rp"); 1159 RF_ASSERT(pda); 1160 DISK_NODE_PARAMS(rpNodes[1], pda); 1161 pda++; 1162 INIT_DISK_NODE(rqNodes + 1, redundantReadNodeName); 1163 RF_ASSERT(pda); 1164 DISK_NODE_PARAMS(rqNodes[1], pda); 1165 } 1166 /* fill in recovery node params */ 1167 for (i = 0; i < nReadNodes; i++) 1168 recoveryNode->params[i] = rudNodes[i].params[0]; /* pda */ 1169 recoveryNode->params[i++].p = (void *) raidPtr; 1170 recoveryNode->params[i++].p = (void *) asmap; 1171 recoveryNode->results[0] = failedPDA; 1172 if (asmap->numDataFailed == 2) 1173 recoveryNode->results[1] = failedPDAtwo; 1174 1175 /* zero fill the target data buffers? */ 1176} 1177 1178#endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) */ 1179