rf_dagdegrd.c revision 1.19
1/* $NetBSD: rf_dagdegrd.c,v 1.19 2004/03/05 03:22:05 oster Exp $ */ 2/* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29/* 30 * rf_dagdegrd.c 31 * 32 * code for creating degraded read DAGs 33 */ 34 35#include <sys/cdefs.h> 36__KERNEL_RCSID(0, "$NetBSD: rf_dagdegrd.c,v 1.19 2004/03/05 03:22:05 oster Exp $"); 37 38#include <dev/raidframe/raidframevar.h> 39 40#include "rf_archs.h" 41#include "rf_raid.h" 42#include "rf_dag.h" 43#include "rf_dagutils.h" 44#include "rf_dagfuncs.h" 45#include "rf_debugMem.h" 46#include "rf_general.h" 47#include "rf_dagdegrd.h" 48 49 50/****************************************************************************** 51 * 52 * General comments on DAG creation: 53 * 54 * All DAGs in this file use roll-away error recovery. Each DAG has a single 55 * commit node, usually called "Cmt." If an error occurs before the Cmt node 56 * is reached, the execution engine will halt forward execution and work 57 * backward through the graph, executing the undo functions. Assuming that 58 * each node in the graph prior to the Cmt node are undoable and atomic - or - 59 * does not make changes to permanent state, the graph will fail atomically. 60 * If an error occurs after the Cmt node executes, the engine will roll-forward 61 * through the graph, blindly executing nodes until it reaches the end. 62 * If a graph reaches the end, it is assumed to have completed successfully. 63 * 64 * A graph has only 1 Cmt node. 65 * 66 */ 67 68 69/****************************************************************************** 70 * 71 * The following wrappers map the standard DAG creation interface to the 72 * DAG creation routines. Additionally, these wrappers enable experimentation 73 * with new DAG structures by providing an extra level of indirection, allowing 74 * the DAG creation routines to be replaced at this single point. 75 */ 76 77void 78rf_CreateRaidFiveDegradedReadDAG(RF_Raid_t *raidPtr, 79 RF_AccessStripeMap_t *asmap, 80 RF_DagHeader_t *dag_h, 81 void *bp, 82 RF_RaidAccessFlags_t flags, 83 RF_AllocListElem_t *allocList) 84{ 85 rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 86 &rf_xorRecoveryFuncs); 87} 88 89 90/****************************************************************************** 91 * 92 * DAG creation code begins here 93 */ 94 95 96/****************************************************************************** 97 * Create a degraded read DAG for RAID level 1 98 * 99 * Hdr -> Nil -> R(p/s)d -> Commit -> Trm 100 * 101 * The "Rd" node reads data from the surviving disk in the mirror pair 102 * Rpd - read of primary copy 103 * Rsd - read of secondary copy 104 * 105 * Parameters: raidPtr - description of the physical array 106 * asmap - logical & physical addresses for this access 107 * bp - buffer ptr (for holding write data) 108 * flags - general flags (e.g. disk locking) 109 * allocList - list of memory allocated in DAG creation 110 *****************************************************************************/ 111 112void 113rf_CreateRaidOneDegradedReadDAG(RF_Raid_t *raidPtr, 114 RF_AccessStripeMap_t *asmap, 115 RF_DagHeader_t *dag_h, 116 void *bp, 117 RF_RaidAccessFlags_t flags, 118 RF_AllocListElem_t *allocList) 119{ 120 RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode; 121 RF_StripeNum_t parityStripeID; 122 RF_ReconUnitNum_t which_ru; 123 RF_PhysDiskAddr_t *pda; 124 int useMirror, i; 125 126 useMirror = 0; 127 parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), 128 asmap->raidAddress, &which_ru); 129#if RF_DEBUG_DAG 130 if (rf_dagDebug) { 131 printf("[Creating RAID level 1 degraded read DAG]\n"); 132 } 133#endif 134 dag_h->creator = "RaidOneDegradedReadDAG"; 135 /* alloc the Wnd nodes and the Wmir node */ 136 if (asmap->numDataFailed == 0) 137 useMirror = RF_FALSE; 138 else 139 useMirror = RF_TRUE; 140 141 /* total number of nodes = 1 + (block + commit + terminator) */ 142 RF_MallocAndAdd(nodes, 4 * sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); 143 i = 0; 144 rdNode = &nodes[i]; 145 i++; 146 blockNode = &nodes[i]; 147 i++; 148 commitNode = &nodes[i]; 149 i++; 150 termNode = &nodes[i]; 151 i++; 152 153 /* this dag can not commit until the commit node is reached. errors 154 * prior to the commit point imply the dag has failed and must be 155 * retried */ 156 dag_h->numCommitNodes = 1; 157 dag_h->numCommits = 0; 158 dag_h->numSuccedents = 1; 159 160 /* initialize the block, commit, and terminator nodes */ 161 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 162 NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); 163 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 164 NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); 165 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, 166 NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); 167 168 pda = asmap->physInfo; 169 RF_ASSERT(pda != NULL); 170 /* parityInfo must describe entire parity unit */ 171 RF_ASSERT(asmap->parityInfo->next == NULL); 172 173 /* initialize the data node */ 174 if (!useMirror) { 175 /* read primary copy of data */ 176 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 177 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList); 178 rdNode->params[0].p = pda; 179 rdNode->params[1].p = pda->bufPtr; 180 rdNode->params[2].v = parityStripeID; 181 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 182 which_ru); 183 } else { 184 /* read secondary copy of data */ 185 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 186 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList); 187 rdNode->params[0].p = asmap->parityInfo; 188 rdNode->params[1].p = pda->bufPtr; 189 rdNode->params[2].v = parityStripeID; 190 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 191 which_ru); 192 } 193 194 /* connect header to block node */ 195 RF_ASSERT(dag_h->numSuccedents == 1); 196 RF_ASSERT(blockNode->numAntecedents == 0); 197 dag_h->succedents[0] = blockNode; 198 199 /* connect block node to rdnode */ 200 RF_ASSERT(blockNode->numSuccedents == 1); 201 RF_ASSERT(rdNode->numAntecedents == 1); 202 blockNode->succedents[0] = rdNode; 203 rdNode->antecedents[0] = blockNode; 204 rdNode->antType[0] = rf_control; 205 206 /* connect rdnode to commit node */ 207 RF_ASSERT(rdNode->numSuccedents == 1); 208 RF_ASSERT(commitNode->numAntecedents == 1); 209 rdNode->succedents[0] = commitNode; 210 commitNode->antecedents[0] = rdNode; 211 commitNode->antType[0] = rf_control; 212 213 /* connect commit node to terminator */ 214 RF_ASSERT(commitNode->numSuccedents == 1); 215 RF_ASSERT(termNode->numAntecedents == 1); 216 RF_ASSERT(termNode->numSuccedents == 0); 217 commitNode->succedents[0] = termNode; 218 termNode->antecedents[0] = commitNode; 219 termNode->antType[0] = rf_control; 220} 221 222 223 224/****************************************************************************** 225 * 226 * creates a DAG to perform a degraded-mode read of data within one stripe. 227 * This DAG is as follows: 228 * 229 * Hdr -> Block -> Rud -> Xor -> Cmt -> T 230 * -> Rrd -> 231 * -> Rp --> 232 * 233 * Each R node is a successor of the L node 234 * One successor arc from each R node goes to C, and the other to X 235 * There is one Rud for each chunk of surviving user data requested by the 236 * user, and one Rrd for each chunk of surviving user data _not_ being read by 237 * the user 238 * R = read, ud = user data, rd = recovery (surviving) data, p = parity 239 * X = XOR, C = Commit, T = terminate 240 * 241 * The block node guarantees a single source node. 242 * 243 * Note: The target buffer for the XOR node is set to the actual user buffer 244 * where the failed data is supposed to end up. This buffer is zero'd by the 245 * code here. Thus, if you create a degraded read dag, use it, and then 246 * re-use, you have to be sure to zero the target buffer prior to the re-use. 247 * 248 * The recfunc argument at the end specifies the name and function used for 249 * the redundancy 250 * recovery function. 251 * 252 *****************************************************************************/ 253 254void 255rf_CreateDegradedReadDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, 256 RF_DagHeader_t *dag_h, void *bp, 257 RF_RaidAccessFlags_t flags, 258 RF_AllocListElem_t *allocList, 259 const RF_RedFuncs_t *recFunc) 260{ 261 RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *xorNode, *blockNode; 262 RF_DagNode_t *commitNode, *rpNode, *termNode; 263 int nNodes, nRrdNodes, nRudNodes, nXorBufs, i; 264 int j, paramNum; 265 RF_SectorCount_t sectorsPerSU; 266 RF_ReconUnitNum_t which_ru; 267 char *overlappingPDAs;/* a temporary array of flags */ 268 RF_AccessStripeMapHeader_t *new_asm_h[2]; 269 RF_PhysDiskAddr_t *pda, *parityPDA; 270 RF_StripeNum_t parityStripeID; 271 RF_PhysDiskAddr_t *failedPDA; 272 RF_RaidLayout_t *layoutPtr; 273 char *rpBuf; 274 275 layoutPtr = &(raidPtr->Layout); 276 /* failedPDA points to the pda within the asm that targets the failed 277 * disk */ 278 failedPDA = asmap->failedPDAs[0]; 279 parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, 280 asmap->raidAddress, &which_ru); 281 sectorsPerSU = layoutPtr->sectorsPerStripeUnit; 282 283#if RF_DEBUG_DAG 284 if (rf_dagDebug) { 285 printf("[Creating degraded read DAG]\n"); 286 } 287#endif 288 RF_ASSERT(asmap->numDataFailed == 1); 289 dag_h->creator = "DegradedReadDAG"; 290 291 /* 292 * generate two ASMs identifying the surviving data we need 293 * in order to recover the lost data 294 */ 295 296 /* overlappingPDAs array must be zero'd */ 297 RF_Malloc(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char), (char *)); 298 rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h, &nXorBufs, 299 &rpBuf, overlappingPDAs, allocList); 300 301 /* 302 * create all the nodes at once 303 * 304 * -1 because no access is generated for the failed pda 305 */ 306 nRudNodes = asmap->numStripeUnitsAccessed - 1; 307 nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) + 308 ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0); 309 nNodes = 5 + nRudNodes + nRrdNodes; /* lock, unlock, xor, Rp, Rud, 310 * Rrd */ 311 RF_MallocAndAdd(nodes, nNodes * sizeof(RF_DagNode_t), (RF_DagNode_t *), 312 allocList); 313 i = 0; 314 blockNode = &nodes[i]; 315 i++; 316 commitNode = &nodes[i]; 317 i++; 318 xorNode = &nodes[i]; 319 i++; 320 rpNode = &nodes[i]; 321 i++; 322 termNode = &nodes[i]; 323 i++; 324 rudNodes = &nodes[i]; 325 i += nRudNodes; 326 rrdNodes = &nodes[i]; 327 i += nRrdNodes; 328 RF_ASSERT(i == nNodes); 329 330 /* initialize nodes */ 331 dag_h->numCommitNodes = 1; 332 dag_h->numCommits = 0; 333 /* this dag can not commit until the commit node is reached errors 334 * prior to the commit point imply the dag has failed */ 335 dag_h->numSuccedents = 1; 336 337 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 338 NULL, nRudNodes + nRrdNodes + 1, 0, 0, 0, dag_h, "Nil", allocList); 339 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 340 NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); 341 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, 342 NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); 343 rf_InitNode(xorNode, rf_wait, RF_FALSE, recFunc->simple, rf_NullNodeUndoFunc, 344 NULL, 1, nRudNodes + nRrdNodes + 1, 2 * nXorBufs + 2, 1, dag_h, 345 recFunc->SimpleName, allocList); 346 347 /* fill in the Rud nodes */ 348 for (pda = asmap->physInfo, i = 0; i < nRudNodes; i++, pda = pda->next) { 349 if (pda == failedPDA) { 350 i--; 351 continue; 352 } 353 rf_InitNode(&rudNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, 354 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, 355 "Rud", allocList); 356 RF_ASSERT(pda); 357 rudNodes[i].params[0].p = pda; 358 rudNodes[i].params[1].p = pda->bufPtr; 359 rudNodes[i].params[2].v = parityStripeID; 360 rudNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 361 } 362 363 /* fill in the Rrd nodes */ 364 i = 0; 365 if (new_asm_h[0]) { 366 for (pda = new_asm_h[0]->stripeMap->physInfo; 367 i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed; 368 i++, pda = pda->next) { 369 rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, 370 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, 371 dag_h, "Rrd", allocList); 372 RF_ASSERT(pda); 373 rrdNodes[i].params[0].p = pda; 374 rrdNodes[i].params[1].p = pda->bufPtr; 375 rrdNodes[i].params[2].v = parityStripeID; 376 rrdNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 377 } 378 } 379 if (new_asm_h[1]) { 380 for (j = 0, pda = new_asm_h[1]->stripeMap->physInfo; 381 j < new_asm_h[1]->stripeMap->numStripeUnitsAccessed; 382 j++, pda = pda->next) { 383 rf_InitNode(&rrdNodes[i + j], rf_wait, RF_FALSE, rf_DiskReadFunc, 384 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, 385 dag_h, "Rrd", allocList); 386 RF_ASSERT(pda); 387 rrdNodes[i + j].params[0].p = pda; 388 rrdNodes[i + j].params[1].p = pda->bufPtr; 389 rrdNodes[i + j].params[2].v = parityStripeID; 390 rrdNodes[i + j].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 391 } 392 } 393 /* make a PDA for the parity unit */ 394 RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 395 parityPDA->col = asmap->parityInfo->col; 396 parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU) 397 * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); 398 parityPDA->numSector = failedPDA->numSector; 399 400 /* initialize the Rp node */ 401 rf_InitNode(rpNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 402 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rp ", allocList); 403 rpNode->params[0].p = parityPDA; 404 rpNode->params[1].p = rpBuf; 405 rpNode->params[2].v = parityStripeID; 406 rpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 407 408 /* 409 * the last and nastiest step is to assign all 410 * the parameters of the Xor node 411 */ 412 paramNum = 0; 413 for (i = 0; i < nRrdNodes; i++) { 414 /* all the Rrd nodes need to be xored together */ 415 xorNode->params[paramNum++] = rrdNodes[i].params[0]; 416 xorNode->params[paramNum++] = rrdNodes[i].params[1]; 417 } 418 for (i = 0; i < nRudNodes; i++) { 419 /* any Rud nodes that overlap the failed access need to be 420 * xored in */ 421 if (overlappingPDAs[i]) { 422 RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 423 memcpy((char *) pda, (char *) rudNodes[i].params[0].p, sizeof(RF_PhysDiskAddr_t)); 424 rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0); 425 xorNode->params[paramNum++].p = pda; 426 xorNode->params[paramNum++].p = pda->bufPtr; 427 } 428 } 429 RF_Free(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char)); 430 431 /* install parity pda as last set of params to be xor'd */ 432 xorNode->params[paramNum++].p = parityPDA; 433 xorNode->params[paramNum++].p = rpBuf; 434 435 /* 436 * the last 2 params to the recovery xor node are 437 * the failed PDA and the raidPtr 438 */ 439 xorNode->params[paramNum++].p = failedPDA; 440 xorNode->params[paramNum++].p = raidPtr; 441 RF_ASSERT(paramNum == 2 * nXorBufs + 2); 442 443 /* 444 * The xor node uses results[0] as the target buffer. 445 * Set pointer and zero the buffer. In the kernel, this 446 * may be a user buffer in which case we have to remap it. 447 */ 448 xorNode->results[0] = failedPDA->bufPtr; 449 memset(failedPDA->bufPtr, 0, rf_RaidAddressToByte(raidPtr, 450 failedPDA->numSector)); 451 452 /* connect nodes to form graph */ 453 /* connect the header to the block node */ 454 RF_ASSERT(dag_h->numSuccedents == 1); 455 RF_ASSERT(blockNode->numAntecedents == 0); 456 dag_h->succedents[0] = blockNode; 457 458 /* connect the block node to the read nodes */ 459 RF_ASSERT(blockNode->numSuccedents == (1 + nRrdNodes + nRudNodes)); 460 RF_ASSERT(rpNode->numAntecedents == 1); 461 blockNode->succedents[0] = rpNode; 462 rpNode->antecedents[0] = blockNode; 463 rpNode->antType[0] = rf_control; 464 for (i = 0; i < nRrdNodes; i++) { 465 RF_ASSERT(rrdNodes[i].numSuccedents == 1); 466 blockNode->succedents[1 + i] = &rrdNodes[i]; 467 rrdNodes[i].antecedents[0] = blockNode; 468 rrdNodes[i].antType[0] = rf_control; 469 } 470 for (i = 0; i < nRudNodes; i++) { 471 RF_ASSERT(rudNodes[i].numSuccedents == 1); 472 blockNode->succedents[1 + nRrdNodes + i] = &rudNodes[i]; 473 rudNodes[i].antecedents[0] = blockNode; 474 rudNodes[i].antType[0] = rf_control; 475 } 476 477 /* connect the read nodes to the xor node */ 478 RF_ASSERT(xorNode->numAntecedents == (1 + nRrdNodes + nRudNodes)); 479 RF_ASSERT(rpNode->numSuccedents == 1); 480 rpNode->succedents[0] = xorNode; 481 xorNode->antecedents[0] = rpNode; 482 xorNode->antType[0] = rf_trueData; 483 for (i = 0; i < nRrdNodes; i++) { 484 RF_ASSERT(rrdNodes[i].numSuccedents == 1); 485 rrdNodes[i].succedents[0] = xorNode; 486 xorNode->antecedents[1 + i] = &rrdNodes[i]; 487 xorNode->antType[1 + i] = rf_trueData; 488 } 489 for (i = 0; i < nRudNodes; i++) { 490 RF_ASSERT(rudNodes[i].numSuccedents == 1); 491 rudNodes[i].succedents[0] = xorNode; 492 xorNode->antecedents[1 + nRrdNodes + i] = &rudNodes[i]; 493 xorNode->antType[1 + nRrdNodes + i] = rf_trueData; 494 } 495 496 /* connect the xor node to the commit node */ 497 RF_ASSERT(xorNode->numSuccedents == 1); 498 RF_ASSERT(commitNode->numAntecedents == 1); 499 xorNode->succedents[0] = commitNode; 500 commitNode->antecedents[0] = xorNode; 501 commitNode->antType[0] = rf_control; 502 503 /* connect the termNode to the commit node */ 504 RF_ASSERT(commitNode->numSuccedents == 1); 505 RF_ASSERT(termNode->numAntecedents == 1); 506 RF_ASSERT(termNode->numSuccedents == 0); 507 commitNode->succedents[0] = termNode; 508 termNode->antType[0] = rf_control; 509 termNode->antecedents[0] = commitNode; 510} 511 512#if (RF_INCLUDE_CHAINDECLUSTER > 0) 513/****************************************************************************** 514 * Create a degraded read DAG for Chained Declustering 515 * 516 * Hdr -> Nil -> R(p/s)d -> Cmt -> Trm 517 * 518 * The "Rd" node reads data from the surviving disk in the mirror pair 519 * Rpd - read of primary copy 520 * Rsd - read of secondary copy 521 * 522 * Parameters: raidPtr - description of the physical array 523 * asmap - logical & physical addresses for this access 524 * bp - buffer ptr (for holding write data) 525 * flags - general flags (e.g. disk locking) 526 * allocList - list of memory allocated in DAG creation 527 *****************************************************************************/ 528 529void 530rf_CreateRaidCDegradedReadDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, 531 RF_DagHeader_t *dag_h, void *bp, 532 RF_RaidAccessFlags_t flags, 533 RF_AllocListElem_t *allocList) 534{ 535 RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode; 536 RF_StripeNum_t parityStripeID; 537 int useMirror, i, shiftable; 538 RF_ReconUnitNum_t which_ru; 539 RF_PhysDiskAddr_t *pda; 540 541 if ((asmap->numDataFailed + asmap->numParityFailed) == 0) { 542 shiftable = RF_TRUE; 543 } else { 544 shiftable = RF_FALSE; 545 } 546 useMirror = 0; 547 parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), 548 asmap->raidAddress, &which_ru); 549 550#if RF_DEBUG_DAG 551 if (rf_dagDebug) { 552 printf("[Creating RAID C degraded read DAG]\n"); 553 } 554#endif 555 dag_h->creator = "RaidCDegradedReadDAG"; 556 /* alloc the Wnd nodes and the Wmir node */ 557 if (asmap->numDataFailed == 0) 558 useMirror = RF_FALSE; 559 else 560 useMirror = RF_TRUE; 561 562 /* total number of nodes = 1 + (block + commit + terminator) */ 563 RF_MallocAndAdd(nodes, 4 * sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); 564 i = 0; 565 rdNode = &nodes[i]; 566 i++; 567 blockNode = &nodes[i]; 568 i++; 569 commitNode = &nodes[i]; 570 i++; 571 termNode = &nodes[i]; 572 i++; 573 574 /* 575 * This dag can not commit until the commit node is reached. 576 * Errors prior to the commit point imply the dag has failed 577 * and must be retried. 578 */ 579 dag_h->numCommitNodes = 1; 580 dag_h->numCommits = 0; 581 dag_h->numSuccedents = 1; 582 583 /* initialize the block, commit, and terminator nodes */ 584 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 585 NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); 586 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 587 NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); 588 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, 589 NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); 590 591 pda = asmap->physInfo; 592 RF_ASSERT(pda != NULL); 593 /* parityInfo must describe entire parity unit */ 594 RF_ASSERT(asmap->parityInfo->next == NULL); 595 596 /* initialize the data node */ 597 if (!useMirror) { 598 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 599 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList); 600 if (shiftable && rf_compute_workload_shift(raidPtr, pda)) { 601 /* shift this read to the next disk in line */ 602 rdNode->params[0].p = asmap->parityInfo; 603 rdNode->params[1].p = pda->bufPtr; 604 rdNode->params[2].v = parityStripeID; 605 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 606 } else { 607 /* read primary copy */ 608 rdNode->params[0].p = pda; 609 rdNode->params[1].p = pda->bufPtr; 610 rdNode->params[2].v = parityStripeID; 611 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 612 } 613 } else { 614 /* read secondary copy of data */ 615 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 616 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList); 617 rdNode->params[0].p = asmap->parityInfo; 618 rdNode->params[1].p = pda->bufPtr; 619 rdNode->params[2].v = parityStripeID; 620 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 621 } 622 623 /* connect header to block node */ 624 RF_ASSERT(dag_h->numSuccedents == 1); 625 RF_ASSERT(blockNode->numAntecedents == 0); 626 dag_h->succedents[0] = blockNode; 627 628 /* connect block node to rdnode */ 629 RF_ASSERT(blockNode->numSuccedents == 1); 630 RF_ASSERT(rdNode->numAntecedents == 1); 631 blockNode->succedents[0] = rdNode; 632 rdNode->antecedents[0] = blockNode; 633 rdNode->antType[0] = rf_control; 634 635 /* connect rdnode to commit node */ 636 RF_ASSERT(rdNode->numSuccedents == 1); 637 RF_ASSERT(commitNode->numAntecedents == 1); 638 rdNode->succedents[0] = commitNode; 639 commitNode->antecedents[0] = rdNode; 640 commitNode->antType[0] = rf_control; 641 642 /* connect commit node to terminator */ 643 RF_ASSERT(commitNode->numSuccedents == 1); 644 RF_ASSERT(termNode->numAntecedents == 1); 645 RF_ASSERT(termNode->numSuccedents == 0); 646 commitNode->succedents[0] = termNode; 647 termNode->antecedents[0] = commitNode; 648 termNode->antType[0] = rf_control; 649} 650#endif /* (RF_INCLUDE_CHAINDECLUSTER > 0) */ 651 652#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) 653/* 654 * XXX move this elsewhere? 655 */ 656void 657rf_DD_GenerateFailedAccessASMs(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, 658 RF_PhysDiskAddr_t **pdap, int *nNodep, 659 RF_PhysDiskAddr_t **pqpdap, int *nPQNodep, 660 RF_AllocListElem_t *allocList) 661{ 662 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 663 int PDAPerDisk, i; 664 RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; 665 int numDataCol = layoutPtr->numDataCol; 666 int state; 667 RF_SectorNum_t suoff, suend; 668 unsigned firstDataCol, napdas, count; 669 RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end = 0; 670 RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0], *ftwo = asmap->failedPDAs[1]; 671 RF_PhysDiskAddr_t *pda_p; 672 RF_PhysDiskAddr_t *phys_p; 673 RF_RaidAddr_t sosAddr; 674 675 /* determine how many pda's we will have to generate per unaccess 676 * stripe. If there is only one failed data unit, it is one; if two, 677 * possibly two, depending wether they overlap. */ 678 679 fone_start = rf_StripeUnitOffset(layoutPtr, fone->startSector); 680 fone_end = fone_start + fone->numSector; 681 682#define CONS_PDA(if,start,num) \ 683 pda_p->col = asmap->if->col; \ 684 pda_p->startSector = ((asmap->if->startSector / secPerSU) * secPerSU) + start; \ 685 pda_p->numSector = num; \ 686 pda_p->next = NULL; \ 687 RF_MallocAndAdd(pda_p->bufPtr,rf_RaidAddressToByte(raidPtr,num),(char *), allocList) 688 689 if (asmap->numDataFailed == 1) { 690 PDAPerDisk = 1; 691 state = 1; 692 RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 693 pda_p = *pqpdap; 694 /* build p */ 695 CONS_PDA(parityInfo, fone_start, fone->numSector); 696 pda_p->type = RF_PDA_TYPE_PARITY; 697 pda_p++; 698 /* build q */ 699 CONS_PDA(qInfo, fone_start, fone->numSector); 700 pda_p->type = RF_PDA_TYPE_Q; 701 } else { 702 ftwo_start = rf_StripeUnitOffset(layoutPtr, ftwo->startSector); 703 ftwo_end = ftwo_start + ftwo->numSector; 704 if (fone->numSector + ftwo->numSector > secPerSU) { 705 PDAPerDisk = 1; 706 state = 2; 707 RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 708 pda_p = *pqpdap; 709 CONS_PDA(parityInfo, 0, secPerSU); 710 pda_p->type = RF_PDA_TYPE_PARITY; 711 pda_p++; 712 CONS_PDA(qInfo, 0, secPerSU); 713 pda_p->type = RF_PDA_TYPE_Q; 714 } else { 715 PDAPerDisk = 2; 716 state = 3; 717 /* four of them, fone, then ftwo */ 718 RF_MallocAndAdd(*pqpdap, 4 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 719 pda_p = *pqpdap; 720 CONS_PDA(parityInfo, fone_start, fone->numSector); 721 pda_p->type = RF_PDA_TYPE_PARITY; 722 pda_p++; 723 CONS_PDA(qInfo, fone_start, fone->numSector); 724 pda_p->type = RF_PDA_TYPE_Q; 725 pda_p++; 726 CONS_PDA(parityInfo, ftwo_start, ftwo->numSector); 727 pda_p->type = RF_PDA_TYPE_PARITY; 728 pda_p++; 729 CONS_PDA(qInfo, ftwo_start, ftwo->numSector); 730 pda_p->type = RF_PDA_TYPE_Q; 731 } 732 } 733 /* figure out number of nonaccessed pda */ 734 napdas = PDAPerDisk * (numDataCol - asmap->numStripeUnitsAccessed - (ftwo == NULL ? 1 : 0)); 735 *nPQNodep = PDAPerDisk; 736 737 /* sweep over the over accessed pda's, figuring out the number of 738 * additional pda's to generate. Of course, skip the failed ones */ 739 740 count = 0; 741 for (pda_p = asmap->physInfo; pda_p; pda_p = pda_p->next) { 742 if ((pda_p == fone) || (pda_p == ftwo)) 743 continue; 744 suoff = rf_StripeUnitOffset(layoutPtr, pda_p->startSector); 745 suend = suoff + pda_p->numSector; 746 switch (state) { 747 case 1: /* one failed PDA to overlap */ 748 /* if a PDA doesn't contain the failed unit, it can 749 * only miss the start or end, not both */ 750 if ((suoff > fone_start) || (suend < fone_end)) 751 count++; 752 break; 753 case 2: /* whole stripe */ 754 if (suoff) /* leak at begining */ 755 count++; 756 if (suend < numDataCol) /* leak at end */ 757 count++; 758 break; 759 case 3: /* two disjoint units */ 760 if ((suoff > fone_start) || (suend < fone_end)) 761 count++; 762 if ((suoff > ftwo_start) || (suend < ftwo_end)) 763 count++; 764 break; 765 default: 766 RF_PANIC(); 767 } 768 } 769 770 napdas += count; 771 *nNodep = napdas; 772 if (napdas == 0) 773 return; /* short circuit */ 774 775 /* allocate up our list of pda's */ 776 777 RF_MallocAndAdd(pda_p, napdas * sizeof(RF_PhysDiskAddr_t), 778 (RF_PhysDiskAddr_t *), allocList); 779 *pdap = pda_p; 780 781 /* linkem together */ 782 for (i = 0; i < (napdas - 1); i++) 783 pda_p[i].next = pda_p + (i + 1); 784 785 /* march through the one's up to the first accessed disk */ 786 firstDataCol = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), asmap->physInfo->raidAddress) % numDataCol; 787 sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); 788 for (i = 0; i < firstDataCol; i++) { 789 if ((pda_p - (*pdap)) == napdas) 790 continue; 791 pda_p->type = RF_PDA_TYPE_DATA; 792 pda_p->raidAddress = sosAddr + (i * secPerSU); 793 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 794 /* skip over dead disks */ 795 if (RF_DEAD_DISK(raidPtr->Disks[pda_p->col].status)) 796 continue; 797 switch (state) { 798 case 1: /* fone */ 799 pda_p->numSector = fone->numSector; 800 pda_p->raidAddress += fone_start; 801 pda_p->startSector += fone_start; 802 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 803 break; 804 case 2: /* full stripe */ 805 pda_p->numSector = secPerSU; 806 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList); 807 break; 808 case 3: /* two slabs */ 809 pda_p->numSector = fone->numSector; 810 pda_p->raidAddress += fone_start; 811 pda_p->startSector += fone_start; 812 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 813 pda_p++; 814 pda_p->type = RF_PDA_TYPE_DATA; 815 pda_p->raidAddress = sosAddr + (i * secPerSU); 816 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 817 pda_p->numSector = ftwo->numSector; 818 pda_p->raidAddress += ftwo_start; 819 pda_p->startSector += ftwo_start; 820 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 821 break; 822 default: 823 RF_PANIC(); 824 } 825 pda_p++; 826 } 827 828 /* march through the touched stripe units */ 829 for (phys_p = asmap->physInfo; phys_p; phys_p = phys_p->next, i++) { 830 if ((phys_p == asmap->failedPDAs[0]) || (phys_p == asmap->failedPDAs[1])) 831 continue; 832 suoff = rf_StripeUnitOffset(layoutPtr, phys_p->startSector); 833 suend = suoff + phys_p->numSector; 834 switch (state) { 835 case 1: /* single buffer */ 836 if (suoff > fone_start) { 837 RF_ASSERT(suend >= fone_end); 838 /* The data read starts after the mapped 839 * access, snip off the begining */ 840 pda_p->numSector = suoff - fone_start; 841 pda_p->raidAddress = sosAddr + (i * secPerSU) + fone_start; 842 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 843 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 844 pda_p++; 845 } 846 if (suend < fone_end) { 847 RF_ASSERT(suoff <= fone_start); 848 /* The data read stops before the end of the 849 * failed access, extend */ 850 pda_p->numSector = fone_end - suend; 851 pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ 852 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 853 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 854 pda_p++; 855 } 856 break; 857 case 2: /* whole stripe unit */ 858 RF_ASSERT((suoff == 0) || (suend == secPerSU)); 859 if (suend < secPerSU) { /* short read, snip from end 860 * on */ 861 pda_p->numSector = secPerSU - suend; 862 pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ 863 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 864 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 865 pda_p++; 866 } else 867 if (suoff > 0) { /* short at front */ 868 pda_p->numSector = suoff; 869 pda_p->raidAddress = sosAddr + (i * secPerSU); 870 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 871 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 872 pda_p++; 873 } 874 break; 875 case 3: /* two nonoverlapping failures */ 876 if ((suoff > fone_start) || (suend < fone_end)) { 877 if (suoff > fone_start) { 878 RF_ASSERT(suend >= fone_end); 879 /* The data read starts after the 880 * mapped access, snip off the 881 * begining */ 882 pda_p->numSector = suoff - fone_start; 883 pda_p->raidAddress = sosAddr + (i * secPerSU) + fone_start; 884 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 885 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 886 pda_p++; 887 } 888 if (suend < fone_end) { 889 RF_ASSERT(suoff <= fone_start); 890 /* The data read stops before the end 891 * of the failed access, extend */ 892 pda_p->numSector = fone_end - suend; 893 pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ 894 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 895 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 896 pda_p++; 897 } 898 } 899 if ((suoff > ftwo_start) || (suend < ftwo_end)) { 900 if (suoff > ftwo_start) { 901 RF_ASSERT(suend >= ftwo_end); 902 /* The data read starts after the 903 * mapped access, snip off the 904 * begining */ 905 pda_p->numSector = suoff - ftwo_start; 906 pda_p->raidAddress = sosAddr + (i * secPerSU) + ftwo_start; 907 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 908 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 909 pda_p++; 910 } 911 if (suend < ftwo_end) { 912 RF_ASSERT(suoff <= ftwo_start); 913 /* The data read stops before the end 914 * of the failed access, extend */ 915 pda_p->numSector = ftwo_end - suend; 916 pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ 917 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 918 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 919 pda_p++; 920 } 921 } 922 break; 923 default: 924 RF_PANIC(); 925 } 926 } 927 928 /* after the last accessed disk */ 929 for (; i < numDataCol; i++) { 930 if ((pda_p - (*pdap)) == napdas) 931 continue; 932 pda_p->type = RF_PDA_TYPE_DATA; 933 pda_p->raidAddress = sosAddr + (i * secPerSU); 934 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 935 /* skip over dead disks */ 936 if (RF_DEAD_DISK(raidPtr->Disks[pda_p->col].status)) 937 continue; 938 switch (state) { 939 case 1: /* fone */ 940 pda_p->numSector = fone->numSector; 941 pda_p->raidAddress += fone_start; 942 pda_p->startSector += fone_start; 943 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 944 break; 945 case 2: /* full stripe */ 946 pda_p->numSector = secPerSU; 947 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList); 948 break; 949 case 3: /* two slabs */ 950 pda_p->numSector = fone->numSector; 951 pda_p->raidAddress += fone_start; 952 pda_p->startSector += fone_start; 953 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 954 pda_p++; 955 pda_p->type = RF_PDA_TYPE_DATA; 956 pda_p->raidAddress = sosAddr + (i * secPerSU); 957 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 958 pda_p->numSector = ftwo->numSector; 959 pda_p->raidAddress += ftwo_start; 960 pda_p->startSector += ftwo_start; 961 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 962 break; 963 default: 964 RF_PANIC(); 965 } 966 pda_p++; 967 } 968 969 RF_ASSERT(pda_p - *pdap == napdas); 970 return; 971} 972#define INIT_DISK_NODE(node,name) \ 973rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2,1,4,0, dag_h, name, allocList); \ 974(node)->succedents[0] = unblockNode; \ 975(node)->succedents[1] = recoveryNode; \ 976(node)->antecedents[0] = blockNode; \ 977(node)->antType[0] = rf_control 978 979#define DISK_NODE_PARAMS(_node_,_p_) \ 980 (_node_).params[0].p = _p_ ; \ 981 (_node_).params[1].p = (_p_)->bufPtr; \ 982 (_node_).params[2].v = parityStripeID; \ 983 (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru) 984 985void 986rf_DoubleDegRead(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, 987 RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, 988 RF_AllocListElem_t *allocList, 989 char *redundantReadNodeName, char *recoveryNodeName, 990 int (*recovFunc) (RF_DagNode_t *)) 991{ 992 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 993 RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *recoveryNode, *blockNode, 994 *unblockNode, *rpNodes, *rqNodes, *termNode; 995 RF_PhysDiskAddr_t *pda, *pqPDAs; 996 RF_PhysDiskAddr_t *npdas; 997 int nNodes, nRrdNodes, nRudNodes, i; 998 RF_ReconUnitNum_t which_ru; 999 int nReadNodes, nPQNodes; 1000 RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0]; 1001 RF_PhysDiskAddr_t *failedPDAtwo = asmap->failedPDAs[1]; 1002 RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru); 1003 1004#if RF_DEBUG_DAG 1005 if (rf_dagDebug) 1006 printf("[Creating Double Degraded Read DAG]\n"); 1007#endif 1008 rf_DD_GenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, &pqPDAs, &nPQNodes, allocList); 1009 1010 nRudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed); 1011 nReadNodes = nRrdNodes + nRudNodes + 2 * nPQNodes; 1012 nNodes = 4 /* block, unblock, recovery, term */ + nReadNodes; 1013 1014 RF_MallocAndAdd(nodes, nNodes * sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); 1015 i = 0; 1016 blockNode = &nodes[i]; 1017 i += 1; 1018 unblockNode = &nodes[i]; 1019 i += 1; 1020 recoveryNode = &nodes[i]; 1021 i += 1; 1022 termNode = &nodes[i]; 1023 i += 1; 1024 rudNodes = &nodes[i]; 1025 i += nRudNodes; 1026 rrdNodes = &nodes[i]; 1027 i += nRrdNodes; 1028 rpNodes = &nodes[i]; 1029 i += nPQNodes; 1030 rqNodes = &nodes[i]; 1031 i += nPQNodes; 1032 RF_ASSERT(i == nNodes); 1033 1034 dag_h->numSuccedents = 1; 1035 dag_h->succedents[0] = blockNode; 1036 dag_h->creator = "DoubleDegRead"; 1037 dag_h->numCommits = 0; 1038 dag_h->numCommitNodes = 1; /* unblock */ 1039 1040 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 2, 0, 0, dag_h, "Trm", allocList); 1041 termNode->antecedents[0] = unblockNode; 1042 termNode->antType[0] = rf_control; 1043 termNode->antecedents[1] = recoveryNode; 1044 termNode->antType[1] = rf_control; 1045 1046 /* init the block and unblock nodes */ 1047 /* The block node has all nodes except itself, unblock and recovery as 1048 * successors. Similarly for predecessors of the unblock. */ 1049 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, "Nil", allocList); 1050 rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nReadNodes, 0, 0, dag_h, "Nil", allocList); 1051 1052 for (i = 0; i < nReadNodes; i++) { 1053 blockNode->succedents[i] = rudNodes + i; 1054 unblockNode->antecedents[i] = rudNodes + i; 1055 unblockNode->antType[i] = rf_control; 1056 } 1057 unblockNode->succedents[0] = termNode; 1058 1059 /* The recovery node has all the reads as predecessors, and the term 1060 * node as successors. It gets a pda as a param from each of the read 1061 * nodes plus the raidPtr. For each failed unit is has a result pda. */ 1062 rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, rf_NullNodeUndoFunc, NULL, 1063 1, /* succesors */ 1064 nReadNodes, /* preds */ 1065 nReadNodes + 2, /* params */ 1066 asmap->numDataFailed, /* results */ 1067 dag_h, recoveryNodeName, allocList); 1068 1069 recoveryNode->succedents[0] = termNode; 1070 for (i = 0; i < nReadNodes; i++) { 1071 recoveryNode->antecedents[i] = rudNodes + i; 1072 recoveryNode->antType[i] = rf_trueData; 1073 } 1074 1075 /* build the read nodes, then come back and fill in recovery params 1076 * and results */ 1077 pda = asmap->physInfo; 1078 for (i = 0; i < nRudNodes; pda = pda->next) { 1079 if ((pda == failedPDA) || (pda == failedPDAtwo)) 1080 continue; 1081 INIT_DISK_NODE(rudNodes + i, "Rud"); 1082 RF_ASSERT(pda); 1083 DISK_NODE_PARAMS(rudNodes[i], pda); 1084 i++; 1085 } 1086 1087 pda = npdas; 1088 for (i = 0; i < nRrdNodes; i++, pda = pda->next) { 1089 INIT_DISK_NODE(rrdNodes + i, "Rrd"); 1090 RF_ASSERT(pda); 1091 DISK_NODE_PARAMS(rrdNodes[i], pda); 1092 } 1093 1094 /* redundancy pdas */ 1095 pda = pqPDAs; 1096 INIT_DISK_NODE(rpNodes, "Rp"); 1097 RF_ASSERT(pda); 1098 DISK_NODE_PARAMS(rpNodes[0], pda); 1099 pda++; 1100 INIT_DISK_NODE(rqNodes, redundantReadNodeName); 1101 RF_ASSERT(pda); 1102 DISK_NODE_PARAMS(rqNodes[0], pda); 1103 if (nPQNodes == 2) { 1104 pda++; 1105 INIT_DISK_NODE(rpNodes + 1, "Rp"); 1106 RF_ASSERT(pda); 1107 DISK_NODE_PARAMS(rpNodes[1], pda); 1108 pda++; 1109 INIT_DISK_NODE(rqNodes + 1, redundantReadNodeName); 1110 RF_ASSERT(pda); 1111 DISK_NODE_PARAMS(rqNodes[1], pda); 1112 } 1113 /* fill in recovery node params */ 1114 for (i = 0; i < nReadNodes; i++) 1115 recoveryNode->params[i] = rudNodes[i].params[0]; /* pda */ 1116 recoveryNode->params[i++].p = (void *) raidPtr; 1117 recoveryNode->params[i++].p = (void *) asmap; 1118 recoveryNode->results[0] = failedPDA; 1119 if (asmap->numDataFailed == 2) 1120 recoveryNode->results[1] = failedPDAtwo; 1121 1122 /* zero fill the target data buffers? */ 1123} 1124 1125#endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) */ 1126