rf_dagdegrd.c revision 1.2
1/* $NetBSD: rf_dagdegrd.c,v 1.2 1999/01/26 02:33:52 oster Exp $ */ 2/* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29/* 30 * rf_dagdegrd.c 31 * 32 * code for creating degraded read DAGs 33 */ 34 35#include "rf_types.h" 36#include "rf_raid.h" 37#include "rf_dag.h" 38#include "rf_dagutils.h" 39#include "rf_dagfuncs.h" 40#include "rf_threadid.h" 41#include "rf_debugMem.h" 42#include "rf_memchunk.h" 43#include "rf_general.h" 44#include "rf_dagdegrd.h" 45#include "rf_sys.h" 46 47 48/****************************************************************************** 49 * 50 * General comments on DAG creation: 51 * 52 * All DAGs in this file use roll-away error recovery. Each DAG has a single 53 * commit node, usually called "Cmt." If an error occurs before the Cmt node 54 * is reached, the execution engine will halt forward execution and work 55 * backward through the graph, executing the undo functions. Assuming that 56 * each node in the graph prior to the Cmt node are undoable and atomic - or - 57 * does not make changes to permanent state, the graph will fail atomically. 58 * If an error occurs after the Cmt node executes, the engine will roll-forward 59 * through the graph, blindly executing nodes until it reaches the end. 60 * If a graph reaches the end, it is assumed to have completed successfully. 61 * 62 * A graph has only 1 Cmt node. 63 * 64 */ 65 66 67/****************************************************************************** 68 * 69 * The following wrappers map the standard DAG creation interface to the 70 * DAG creation routines. Additionally, these wrappers enable experimentation 71 * with new DAG structures by providing an extra level of indirection, allowing 72 * the DAG creation routines to be replaced at this single point. 73 */ 74 75void rf_CreateRaidFiveDegradedReadDAG( 76 RF_Raid_t *raidPtr, 77 RF_AccessStripeMap_t *asmap, 78 RF_DagHeader_t *dag_h, 79 void *bp, 80 RF_RaidAccessFlags_t flags, 81 RF_AllocListElem_t *allocList) 82{ 83 rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 84 &rf_xorRecoveryFuncs); 85} 86 87 88/****************************************************************************** 89 * 90 * DAG creation code begins here 91 */ 92 93 94/****************************************************************************** 95 * Create a degraded read DAG for RAID level 1 96 * 97 * Hdr -> Nil -> R(p/s)d -> Commit -> Trm 98 * 99 * The "Rd" node reads data from the surviving disk in the mirror pair 100 * Rpd - read of primary copy 101 * Rsd - read of secondary copy 102 * 103 * Parameters: raidPtr - description of the physical array 104 * asmap - logical & physical addresses for this access 105 * bp - buffer ptr (for holding write data) 106 * flags - general flags (e.g. disk locking) 107 * allocList - list of memory allocated in DAG creation 108 *****************************************************************************/ 109 110void rf_CreateRaidOneDegradedReadDAG( 111 RF_Raid_t *raidPtr, 112 RF_AccessStripeMap_t *asmap, 113 RF_DagHeader_t *dag_h, 114 void *bp, 115 RF_RaidAccessFlags_t flags, 116 RF_AllocListElem_t *allocList) 117{ 118 RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode; 119 RF_StripeNum_t parityStripeID; 120 RF_ReconUnitNum_t which_ru; 121 RF_PhysDiskAddr_t *pda; 122 int useMirror, i; 123 124 useMirror = 0; 125 parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), 126 asmap->raidAddress, &which_ru); 127 if (rf_dagDebug) { 128 printf("[Creating RAID level 1 degraded read DAG]\n"); 129 } 130 dag_h->creator = "RaidOneDegradedReadDAG"; 131 /* alloc the Wnd nodes and the Wmir node */ 132 if (asmap->numDataFailed == 0) 133 useMirror = RF_FALSE; 134 else 135 useMirror = RF_TRUE; 136 137 /* total number of nodes = 1 + (block + commit + terminator) */ 138 RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); 139 i = 0; 140 rdNode = &nodes[i]; i++; 141 blockNode = &nodes[i]; i++; 142 commitNode = &nodes[i]; i++; 143 termNode = &nodes[i]; i++; 144 145 /* this dag can not commit until the commit node is reached. errors prior 146 * to the commit point imply the dag has failed and must be retried 147 */ 148 dag_h->numCommitNodes = 1; 149 dag_h->numCommits = 0; 150 dag_h->numSuccedents = 1; 151 152 /* initialize the block, commit, and terminator nodes */ 153 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 154 NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); 155 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 156 NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); 157 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, 158 NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); 159 160 pda = asmap->physInfo; 161 RF_ASSERT(pda != NULL); 162 /* parityInfo must describe entire parity unit */ 163 RF_ASSERT(asmap->parityInfo->next == NULL); 164 165 /* initialize the data node */ 166 if (!useMirror) { 167 /* read primary copy of data */ 168 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 169 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList); 170 rdNode->params[0].p = pda; 171 rdNode->params[1].p = pda->bufPtr; 172 rdNode->params[2].v = parityStripeID; 173 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 174 } 175 else { 176 /* read secondary copy of data */ 177 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 178 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList); 179 rdNode->params[0].p = asmap->parityInfo; 180 rdNode->params[1].p = pda->bufPtr; 181 rdNode->params[2].v = parityStripeID; 182 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 183 } 184 185 /* connect header to block node */ 186 RF_ASSERT(dag_h->numSuccedents == 1); 187 RF_ASSERT(blockNode->numAntecedents == 0); 188 dag_h->succedents[0] = blockNode; 189 190 /* connect block node to rdnode */ 191 RF_ASSERT(blockNode->numSuccedents == 1); 192 RF_ASSERT(rdNode->numAntecedents == 1); 193 blockNode->succedents[0] = rdNode; 194 rdNode->antecedents[0] = blockNode; 195 rdNode->antType[0] = rf_control; 196 197 /* connect rdnode to commit node */ 198 RF_ASSERT(rdNode->numSuccedents == 1); 199 RF_ASSERT(commitNode->numAntecedents == 1); 200 rdNode->succedents[0] = commitNode; 201 commitNode->antecedents[0] = rdNode; 202 commitNode->antType[0] = rf_control; 203 204 /* connect commit node to terminator */ 205 RF_ASSERT(commitNode->numSuccedents == 1); 206 RF_ASSERT(termNode->numAntecedents == 1); 207 RF_ASSERT(termNode->numSuccedents == 0); 208 commitNode->succedents[0] = termNode; 209 termNode->antecedents[0] = commitNode; 210 termNode->antType[0] = rf_control; 211} 212 213 214 215/****************************************************************************** 216 * 217 * creates a DAG to perform a degraded-mode read of data within one stripe. 218 * This DAG is as follows: 219 * 220 * Hdr -> Block -> Rud -> Xor -> Cmt -> T 221 * -> Rrd -> 222 * -> Rp --> 223 * 224 * Each R node is a successor of the L node 225 * One successor arc from each R node goes to C, and the other to X 226 * There is one Rud for each chunk of surviving user data requested by the 227 * user, and one Rrd for each chunk of surviving user data _not_ being read by 228 * the user 229 * R = read, ud = user data, rd = recovery (surviving) data, p = parity 230 * X = XOR, C = Commit, T = terminate 231 * 232 * The block node guarantees a single source node. 233 * 234 * Note: The target buffer for the XOR node is set to the actual user buffer 235 * where the failed data is supposed to end up. This buffer is zero'd by the 236 * code here. Thus, if you create a degraded read dag, use it, and then 237 * re-use, you have to be sure to zero the target buffer prior to the re-use. 238 * 239 * The recfunc argument at the end specifies the name and function used for 240 * the redundancy 241 * recovery function. 242 * 243 *****************************************************************************/ 244 245void rf_CreateDegradedReadDAG( 246 RF_Raid_t *raidPtr, 247 RF_AccessStripeMap_t *asmap, 248 RF_DagHeader_t *dag_h, 249 void *bp, 250 RF_RaidAccessFlags_t flags, 251 RF_AllocListElem_t *allocList, 252 RF_RedFuncs_t *recFunc) 253{ 254 RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *xorNode, *blockNode; 255 RF_DagNode_t *commitNode, *rpNode, *termNode; 256 int nNodes, nRrdNodes, nRudNodes, nXorBufs, i; 257 int j, paramNum; 258 RF_SectorCount_t sectorsPerSU; 259 RF_ReconUnitNum_t which_ru; 260 char *overlappingPDAs; /* a temporary array of flags */ 261 RF_AccessStripeMapHeader_t *new_asm_h[2]; 262 RF_PhysDiskAddr_t *pda, *parityPDA; 263 RF_StripeNum_t parityStripeID; 264 RF_PhysDiskAddr_t *failedPDA; 265 RF_RaidLayout_t *layoutPtr; 266 char *rpBuf; 267 268 layoutPtr = &(raidPtr->Layout); 269 /* failedPDA points to the pda within the asm that targets the failed disk */ 270 failedPDA = asmap->failedPDAs[0]; 271 parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, 272 asmap->raidAddress, &which_ru); 273 sectorsPerSU = layoutPtr->sectorsPerStripeUnit; 274 275 if (rf_dagDebug) { 276 printf("[Creating degraded read DAG]\n"); 277 } 278 279 RF_ASSERT( asmap->numDataFailed == 1 ); 280 dag_h->creator = "DegradedReadDAG"; 281 282 /* 283 * generate two ASMs identifying the surviving data we need 284 * in order to recover the lost data 285 */ 286 287 /* overlappingPDAs array must be zero'd */ 288 RF_Calloc(overlappingPDAs, asmap->numStripeUnitsAccessed, sizeof(char), (char *)); 289 rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h, &nXorBufs, 290 &rpBuf, overlappingPDAs, allocList); 291 292 /* 293 * create all the nodes at once 294 * 295 * -1 because no access is generated for the failed pda 296 */ 297 nRudNodes = asmap->numStripeUnitsAccessed-1; 298 nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) + 299 ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0); 300 nNodes = 5 + nRudNodes + nRrdNodes; /* lock, unlock, xor, Rp, Rud, Rrd */ 301 RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), 302 allocList); 303 i = 0; 304 blockNode = &nodes[i]; i++; 305 commitNode = &nodes[i]; i++; 306 xorNode = &nodes[i]; i++; 307 rpNode = &nodes[i]; i++; 308 termNode = &nodes[i]; i++; 309 rudNodes = &nodes[i]; i += nRudNodes; 310 rrdNodes = &nodes[i]; i += nRrdNodes; 311 RF_ASSERT(i == nNodes); 312 313 /* initialize nodes */ 314 dag_h->numCommitNodes = 1; 315 dag_h->numCommits = 0; 316 /* this dag can not commit until the commit node is reached 317 * errors prior to the commit point imply the dag has failed 318 */ 319 dag_h->numSuccedents = 1; 320 321 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 322 NULL, nRudNodes+nRrdNodes+1, 0, 0, 0, dag_h, "Nil", allocList); 323 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 324 NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); 325 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, 326 NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); 327 rf_InitNode(xorNode, rf_wait, RF_FALSE, recFunc->simple, rf_NullNodeUndoFunc, 328 NULL, 1, nRudNodes+nRrdNodes+1, 2*nXorBufs+2, 1, dag_h, 329 recFunc->SimpleName, allocList); 330 331 /* fill in the Rud nodes */ 332 for (pda=asmap->physInfo, i=0; i<nRudNodes; i++, pda=pda->next) { 333 if (pda == failedPDA) {i--; continue;} 334 rf_InitNode(&rudNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, 335 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, 336 "Rud", allocList); 337 RF_ASSERT(pda); 338 rudNodes[i].params[0].p = pda; 339 rudNodes[i].params[1].p = pda->bufPtr; 340 rudNodes[i].params[2].v = parityStripeID; 341 rudNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 342 } 343 344 /* fill in the Rrd nodes */ 345 i = 0; 346 if (new_asm_h[0]) { 347 for (pda=new_asm_h[0]->stripeMap->physInfo; 348 i<new_asm_h[0]->stripeMap->numStripeUnitsAccessed; 349 i++, pda=pda->next) 350 { 351 rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, 352 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, 353 dag_h, "Rrd", allocList); 354 RF_ASSERT(pda); 355 rrdNodes[i].params[0].p = pda; 356 rrdNodes[i].params[1].p = pda->bufPtr; 357 rrdNodes[i].params[2].v = parityStripeID; 358 rrdNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 359 } 360 } 361 if (new_asm_h[1]) { 362 for (j=0,pda=new_asm_h[1]->stripeMap->physInfo; 363 j<new_asm_h[1]->stripeMap->numStripeUnitsAccessed; 364 j++, pda=pda->next) 365 { 366 rf_InitNode(&rrdNodes[i+j], rf_wait, RF_FALSE, rf_DiskReadFunc, 367 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, 368 dag_h, "Rrd", allocList); 369 RF_ASSERT(pda); 370 rrdNodes[i+j].params[0].p = pda; 371 rrdNodes[i+j].params[1].p = pda->bufPtr; 372 rrdNodes[i+j].params[2].v = parityStripeID; 373 rrdNodes[i+j].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 374 } 375 } 376 377 /* make a PDA for the parity unit */ 378 RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 379 parityPDA->row = asmap->parityInfo->row; 380 parityPDA->col = asmap->parityInfo->col; 381 parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU) 382 * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); 383 parityPDA->numSector = failedPDA->numSector; 384 385 /* initialize the Rp node */ 386 rf_InitNode(rpNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 387 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rp ", allocList); 388 rpNode->params[0].p = parityPDA; 389 rpNode->params[1].p = rpBuf; 390 rpNode->params[2].v = parityStripeID; 391 rpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 392 393 /* 394 * the last and nastiest step is to assign all 395 * the parameters of the Xor node 396 */ 397 paramNum=0; 398 for (i=0; i<nRrdNodes; i++) { 399 /* all the Rrd nodes need to be xored together */ 400 xorNode->params[paramNum++] = rrdNodes[i].params[0]; 401 xorNode->params[paramNum++] = rrdNodes[i].params[1]; 402 } 403 for (i=0; i<nRudNodes; i++) { 404 /* any Rud nodes that overlap the failed access need to be xored in */ 405 if (overlappingPDAs[i]) { 406 RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 407 bcopy((char *)rudNodes[i].params[0].p, (char *)pda, sizeof(RF_PhysDiskAddr_t)); 408 rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0); 409 xorNode->params[paramNum++].p = pda; 410 xorNode->params[paramNum++].p = pda->bufPtr; 411 } 412 } 413 RF_Free(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char)); 414 415 /* install parity pda as last set of params to be xor'd */ 416 xorNode->params[paramNum++].p = parityPDA; 417 xorNode->params[paramNum++].p = rpBuf; 418 419 /* 420 * the last 2 params to the recovery xor node are 421 * the failed PDA and the raidPtr 422 */ 423 xorNode->params[paramNum++].p = failedPDA; 424 xorNode->params[paramNum++].p = raidPtr; 425 RF_ASSERT( paramNum == 2*nXorBufs+2 ); 426 427 /* 428 * The xor node uses results[0] as the target buffer. 429 * Set pointer and zero the buffer. In the kernel, this 430 * may be a user buffer in which case we have to remap it. 431 */ 432 xorNode->results[0] = failedPDA->bufPtr; 433 RF_BZERO(bp, failedPDA->bufPtr, rf_RaidAddressToByte(raidPtr, 434 failedPDA->numSector)); 435 436 /* connect nodes to form graph */ 437 /* connect the header to the block node */ 438 RF_ASSERT(dag_h->numSuccedents == 1); 439 RF_ASSERT(blockNode->numAntecedents == 0); 440 dag_h->succedents[0] = blockNode; 441 442 /* connect the block node to the read nodes */ 443 RF_ASSERT(blockNode->numSuccedents == (1 + nRrdNodes + nRudNodes)); 444 RF_ASSERT(rpNode->numAntecedents == 1); 445 blockNode->succedents[0] = rpNode; 446 rpNode->antecedents[0] = blockNode; 447 rpNode->antType[0] = rf_control; 448 for (i = 0; i < nRrdNodes; i++) { 449 RF_ASSERT(rrdNodes[i].numSuccedents == 1); 450 blockNode->succedents[1 + i] = &rrdNodes[i]; 451 rrdNodes[i].antecedents[0] = blockNode; 452 rrdNodes[i].antType[0] = rf_control; 453 } 454 for (i = 0; i < nRudNodes; i++) { 455 RF_ASSERT(rudNodes[i].numSuccedents == 1); 456 blockNode->succedents[1 + nRrdNodes + i] = &rudNodes[i]; 457 rudNodes[i].antecedents[0] = blockNode; 458 rudNodes[i].antType[0] = rf_control; 459 } 460 461 /* connect the read nodes to the xor node */ 462 RF_ASSERT(xorNode->numAntecedents == (1 + nRrdNodes + nRudNodes)); 463 RF_ASSERT(rpNode->numSuccedents == 1); 464 rpNode->succedents[0] = xorNode; 465 xorNode->antecedents[0] = rpNode; 466 xorNode->antType[0] = rf_trueData; 467 for (i = 0; i < nRrdNodes; i++) { 468 RF_ASSERT(rrdNodes[i].numSuccedents == 1); 469 rrdNodes[i].succedents[0] = xorNode; 470 xorNode->antecedents[1 + i] = &rrdNodes[i]; 471 xorNode->antType[1 + i] = rf_trueData; 472 } 473 for (i = 0; i < nRudNodes; i++) { 474 RF_ASSERT(rudNodes[i].numSuccedents == 1); 475 rudNodes[i].succedents[0] = xorNode; 476 xorNode->antecedents[1 + nRrdNodes + i] = &rudNodes[i]; 477 xorNode->antType[1 + nRrdNodes + i] = rf_trueData; 478 } 479 480 /* connect the xor node to the commit node */ 481 RF_ASSERT(xorNode->numSuccedents == 1); 482 RF_ASSERT(commitNode->numAntecedents == 1); 483 xorNode->succedents[0] = commitNode; 484 commitNode->antecedents[0] = xorNode; 485 commitNode->antType[0] = rf_control; 486 487 /* connect the termNode to the commit node */ 488 RF_ASSERT(commitNode->numSuccedents == 1); 489 RF_ASSERT(termNode->numAntecedents == 1); 490 RF_ASSERT(termNode->numSuccedents == 0); 491 commitNode->succedents[0] = termNode; 492 termNode->antType[0] = rf_control; 493 termNode->antecedents[0] = commitNode; 494} 495 496 497/****************************************************************************** 498 * Create a degraded read DAG for Chained Declustering 499 * 500 * Hdr -> Nil -> R(p/s)d -> Cmt -> Trm 501 * 502 * The "Rd" node reads data from the surviving disk in the mirror pair 503 * Rpd - read of primary copy 504 * Rsd - read of secondary copy 505 * 506 * Parameters: raidPtr - description of the physical array 507 * asmap - logical & physical addresses for this access 508 * bp - buffer ptr (for holding write data) 509 * flags - general flags (e.g. disk locking) 510 * allocList - list of memory allocated in DAG creation 511 *****************************************************************************/ 512 513void rf_CreateRaidCDegradedReadDAG( 514 RF_Raid_t *raidPtr, 515 RF_AccessStripeMap_t *asmap, 516 RF_DagHeader_t *dag_h, 517 void *bp, 518 RF_RaidAccessFlags_t flags, 519 RF_AllocListElem_t *allocList) 520{ 521 RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode; 522 RF_StripeNum_t parityStripeID; 523 int useMirror, i, shiftable; 524 RF_ReconUnitNum_t which_ru; 525 RF_PhysDiskAddr_t *pda; 526 527 if ((asmap->numDataFailed + asmap->numParityFailed) == 0) { 528 shiftable = RF_TRUE; 529 } 530 else { 531 shiftable = RF_FALSE; 532 } 533 useMirror = 0; 534 parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), 535 asmap->raidAddress, &which_ru); 536 537 if (rf_dagDebug) { 538 printf("[Creating RAID C degraded read DAG]\n"); 539 } 540 dag_h->creator = "RaidCDegradedReadDAG"; 541 /* alloc the Wnd nodes and the Wmir node */ 542 if (asmap->numDataFailed == 0) 543 useMirror = RF_FALSE; 544 else 545 useMirror = RF_TRUE; 546 547 /* total number of nodes = 1 + (block + commit + terminator) */ 548 RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); 549 i = 0; 550 rdNode = &nodes[i]; i++; 551 blockNode = &nodes[i]; i++; 552 commitNode = &nodes[i]; i++; 553 termNode = &nodes[i]; i++; 554 555 /* 556 * This dag can not commit until the commit node is reached. 557 * Errors prior to the commit point imply the dag has failed 558 * and must be retried. 559 */ 560 dag_h->numCommitNodes = 1; 561 dag_h->numCommits = 0; 562 dag_h->numSuccedents = 1; 563 564 /* initialize the block, commit, and terminator nodes */ 565 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 566 NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); 567 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 568 NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); 569 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, 570 NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); 571 572 pda = asmap->physInfo; 573 RF_ASSERT(pda != NULL); 574 /* parityInfo must describe entire parity unit */ 575 RF_ASSERT(asmap->parityInfo->next == NULL); 576 577 /* initialize the data node */ 578 if (!useMirror) { 579 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 580 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList); 581 if (shiftable && rf_compute_workload_shift(raidPtr, pda)) { 582 /* shift this read to the next disk in line */ 583 rdNode->params[0].p = asmap->parityInfo; 584 rdNode->params[1].p = pda->bufPtr; 585 rdNode->params[2].v = parityStripeID; 586 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 587 } 588 else { 589 /* read primary copy */ 590 rdNode->params[0].p = pda; 591 rdNode->params[1].p = pda->bufPtr; 592 rdNode->params[2].v = parityStripeID; 593 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 594 } 595 } 596 else { 597 /* read secondary copy of data */ 598 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 599 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList); 600 rdNode->params[0].p = asmap->parityInfo; 601 rdNode->params[1].p = pda->bufPtr; 602 rdNode->params[2].v = parityStripeID; 603 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 604 } 605 606 /* connect header to block node */ 607 RF_ASSERT(dag_h->numSuccedents == 1); 608 RF_ASSERT(blockNode->numAntecedents == 0); 609 dag_h->succedents[0] = blockNode; 610 611 /* connect block node to rdnode */ 612 RF_ASSERT(blockNode->numSuccedents == 1); 613 RF_ASSERT(rdNode->numAntecedents == 1); 614 blockNode->succedents[0] = rdNode; 615 rdNode->antecedents[0] = blockNode; 616 rdNode->antType[0] = rf_control; 617 618 /* connect rdnode to commit node */ 619 RF_ASSERT(rdNode->numSuccedents == 1); 620 RF_ASSERT(commitNode->numAntecedents == 1); 621 rdNode->succedents[0] = commitNode; 622 commitNode->antecedents[0] = rdNode; 623 commitNode->antType[0] = rf_control; 624 625 /* connect commit node to terminator */ 626 RF_ASSERT(commitNode->numSuccedents == 1); 627 RF_ASSERT(termNode->numAntecedents == 1); 628 RF_ASSERT(termNode->numSuccedents == 0); 629 commitNode->succedents[0] = termNode; 630 termNode->antecedents[0] = commitNode; 631 termNode->antType[0] = rf_control; 632} 633 634/* 635 * XXX move this elsewhere? 636 */ 637void rf_DD_GenerateFailedAccessASMs( 638 RF_Raid_t *raidPtr, 639 RF_AccessStripeMap_t *asmap, 640 RF_PhysDiskAddr_t **pdap, 641 int *nNodep, 642 RF_PhysDiskAddr_t **pqpdap, 643 int *nPQNodep, 644 RF_AllocListElem_t *allocList) 645{ 646 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 647 int PDAPerDisk,i; 648 RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; 649 int numDataCol = layoutPtr->numDataCol; 650 int state; 651 RF_SectorNum_t suoff, suend; 652 unsigned firstDataCol, napdas, count; 653 RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end = 0; 654 RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0], *ftwo = asmap->failedPDAs[1]; 655 RF_PhysDiskAddr_t *pda_p; 656 RF_PhysDiskAddr_t *phys_p; 657 RF_RaidAddr_t sosAddr; 658 659 /* determine how many pda's we will have to generate per unaccess stripe. 660 If there is only one failed data unit, it is one; if two, possibly two, 661 depending wether they overlap. */ 662 663 fone_start = rf_StripeUnitOffset(layoutPtr,fone->startSector); 664 fone_end = fone_start + fone->numSector; 665 666#define CONS_PDA(if,start,num) \ 667 pda_p->row = asmap->if->row; pda_p->col = asmap->if->col; \ 668 pda_p->startSector = ((asmap->if->startSector / secPerSU) * secPerSU) + start; \ 669 pda_p->numSector = num; \ 670 pda_p->next = NULL; \ 671 RF_MallocAndAdd(pda_p->bufPtr,rf_RaidAddressToByte(raidPtr,num),(char *), allocList) 672 673 if (asmap->numDataFailed==1) 674 { 675 PDAPerDisk = 1; 676 state = 1; 677 RF_MallocAndAdd(*pqpdap,2*sizeof(RF_PhysDiskAddr_t),(RF_PhysDiskAddr_t *), allocList); 678 pda_p = *pqpdap; 679 /* build p */ 680 CONS_PDA(parityInfo,fone_start,fone->numSector); 681 pda_p->type = RF_PDA_TYPE_PARITY; 682 pda_p++; 683 /* build q */ 684 CONS_PDA(qInfo,fone_start,fone->numSector); 685 pda_p->type = RF_PDA_TYPE_Q; 686 } 687 else 688 { 689 ftwo_start = rf_StripeUnitOffset(layoutPtr,ftwo->startSector); 690 ftwo_end = ftwo_start + ftwo->numSector; 691 if (fone->numSector + ftwo->numSector > secPerSU) 692 { 693 PDAPerDisk = 1; 694 state = 2; 695 RF_MallocAndAdd(*pqpdap,2*sizeof(RF_PhysDiskAddr_t),(RF_PhysDiskAddr_t *), allocList); 696 pda_p = *pqpdap; 697 CONS_PDA(parityInfo,0,secPerSU); 698 pda_p->type = RF_PDA_TYPE_PARITY; 699 pda_p++; 700 CONS_PDA(qInfo,0,secPerSU); 701 pda_p->type = RF_PDA_TYPE_Q; 702 } 703 else 704 { 705 PDAPerDisk = 2; 706 state = 3; 707 /* four of them, fone, then ftwo */ 708 RF_MallocAndAdd(*pqpdap,4*sizeof(RF_PhysDiskAddr_t),(RF_PhysDiskAddr_t *), allocList); 709 pda_p = *pqpdap; 710 CONS_PDA(parityInfo,fone_start,fone->numSector); 711 pda_p->type = RF_PDA_TYPE_PARITY; 712 pda_p++; 713 CONS_PDA(qInfo,fone_start,fone->numSector); 714 pda_p->type = RF_PDA_TYPE_Q; 715 pda_p++; 716 CONS_PDA(parityInfo,ftwo_start,ftwo->numSector); 717 pda_p->type = RF_PDA_TYPE_PARITY; 718 pda_p++; 719 CONS_PDA(qInfo,ftwo_start,ftwo->numSector); 720 pda_p->type = RF_PDA_TYPE_Q; 721 } 722 } 723 /* figure out number of nonaccessed pda */ 724 napdas = PDAPerDisk * (numDataCol - asmap->numStripeUnitsAccessed - (ftwo==NULL ? 1 : 0)); 725 *nPQNodep = PDAPerDisk; 726 727 /* sweep over the over accessed pda's, figuring out the number of 728 additional pda's to generate. Of course, skip the failed ones */ 729 730 count = 0; 731 for ( pda_p=asmap->physInfo; pda_p; pda_p= pda_p->next) 732 { 733 if ((pda_p == fone) || (pda_p == ftwo)) 734 continue; 735 suoff = rf_StripeUnitOffset(layoutPtr,pda_p->startSector); 736 suend = suoff + pda_p->numSector; 737 switch (state) 738 { 739 case 1: /* one failed PDA to overlap */ 740 /* if a PDA doesn't contain the failed unit, it can 741 only miss the start or end, not both */ 742 if ((suoff > fone_start) || (suend <fone_end)) 743 count++; 744 break; 745 case 2: /* whole stripe */ 746 if (suoff) /* leak at begining */ 747 count++; 748 if (suend < numDataCol) /* leak at end */ 749 count++; 750 break; 751 case 3: /* two disjoint units */ 752 if ((suoff > fone_start) || (suend <fone_end)) 753 count++; 754 if ((suoff > ftwo_start) || (suend <ftwo_end)) 755 count++; 756 break; 757 default: 758 RF_PANIC(); 759 } 760 } 761 762 napdas += count; 763 *nNodep = napdas; 764 if (napdas == 0) return; /* short circuit */ 765 766 /* allocate up our list of pda's */ 767 768 RF_CallocAndAdd(pda_p, napdas, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 769 *pdap = pda_p; 770 771 /* linkem together */ 772 for (i=0; i < (napdas-1); i++) 773 pda_p[i].next = pda_p+(i+1); 774 775 /* march through the one's up to the first accessed disk */ 776 firstDataCol = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),asmap->physInfo->raidAddress) % numDataCol; 777 sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); 778 for (i=0; i < firstDataCol; i++) 779 { 780 if ((pda_p - (*pdap)) == napdas) 781 continue; 782 pda_p->type = RF_PDA_TYPE_DATA; 783 pda_p->raidAddress = sosAddr + (i * secPerSU); 784 (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 785 /* skip over dead disks */ 786 if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status)) 787 continue; 788 switch (state) 789 { 790 case 1: /* fone */ 791 pda_p->numSector = fone->numSector; 792 pda_p->raidAddress += fone_start; 793 pda_p->startSector += fone_start; 794 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); 795 break; 796 case 2: /* full stripe */ 797 pda_p->numSector = secPerSU; 798 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,secPerSU), (char *), allocList); 799 break; 800 case 3: /* two slabs */ 801 pda_p->numSector = fone->numSector; 802 pda_p->raidAddress += fone_start; 803 pda_p->startSector += fone_start; 804 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); 805 pda_p++; 806 pda_p->type = RF_PDA_TYPE_DATA; 807 pda_p->raidAddress = sosAddr + (i * secPerSU); 808 (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 809 pda_p->numSector = ftwo->numSector; 810 pda_p->raidAddress += ftwo_start; 811 pda_p->startSector += ftwo_start; 812 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); 813 break; 814 default: 815 RF_PANIC(); 816 } 817 pda_p++; 818 } 819 820 /* march through the touched stripe units */ 821 for (phys_p = asmap->physInfo; phys_p; phys_p = phys_p->next, i++) 822 { 823 if ((phys_p == asmap->failedPDAs[0]) || (phys_p == asmap->failedPDAs[1])) 824 continue; 825 suoff = rf_StripeUnitOffset(layoutPtr,phys_p->startSector); 826 suend = suoff + phys_p->numSector; 827 switch(state) 828 { 829 case 1: /* single buffer */ 830 if (suoff > fone_start) 831 { 832 RF_ASSERT( suend >= fone_end ); 833 /* The data read starts after the mapped access, 834 snip off the begining */ 835 pda_p->numSector = suoff - fone_start; 836 pda_p->raidAddress = sosAddr + (i*secPerSU) + fone_start; 837 (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 838 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); 839 pda_p++; 840 } 841 if (suend < fone_end) 842 { 843 RF_ASSERT ( suoff <= fone_start); 844 /* The data read stops before the end of the failed access, extend */ 845 pda_p->numSector = fone_end - suend; 846 pda_p->raidAddress = sosAddr + (i*secPerSU) + suend; /* off by one? */ 847 (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 848 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); 849 pda_p++; 850 } 851 break; 852 case 2: /* whole stripe unit */ 853 RF_ASSERT( (suoff == 0) || (suend == secPerSU)); 854 if (suend < secPerSU) 855 { /* short read, snip from end on */ 856 pda_p->numSector = secPerSU - suend; 857 pda_p->raidAddress = sosAddr + (i*secPerSU) + suend; /* off by one? */ 858 (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 859 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); 860 pda_p++; 861 } 862 else 863 if (suoff > 0) 864 { /* short at front */ 865 pda_p->numSector = suoff; 866 pda_p->raidAddress = sosAddr + (i*secPerSU); 867 (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 868 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); 869 pda_p++; 870 } 871 break; 872 case 3: /* two nonoverlapping failures */ 873 if ((suoff > fone_start) || (suend <fone_end)) 874 { 875 if (suoff > fone_start) 876 { 877 RF_ASSERT( suend >= fone_end ); 878 /* The data read starts after the mapped access, 879 snip off the begining */ 880 pda_p->numSector = suoff - fone_start; 881 pda_p->raidAddress = sosAddr + (i*secPerSU) + fone_start; 882 (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 883 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); 884 pda_p++; 885 } 886 if (suend < fone_end) 887 { 888 RF_ASSERT ( suoff <= fone_start); 889 /* The data read stops before the end of the failed access, extend */ 890 pda_p->numSector = fone_end - suend; 891 pda_p->raidAddress = sosAddr + (i*secPerSU) + suend; /* off by one? */ 892 (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 893 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); 894 pda_p++; 895 } 896 } 897 if ((suoff > ftwo_start) || (suend <ftwo_end)) 898 { 899 if (suoff > ftwo_start) 900 { 901 RF_ASSERT( suend >= ftwo_end ); 902 /* The data read starts after the mapped access, 903 snip off the begining */ 904 pda_p->numSector = suoff - ftwo_start; 905 pda_p->raidAddress = sosAddr + (i*secPerSU) + ftwo_start; 906 (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 907 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); 908 pda_p++; 909 } 910 if (suend < ftwo_end) 911 { 912 RF_ASSERT ( suoff <= ftwo_start); 913 /* The data read stops before the end of the failed access, extend */ 914 pda_p->numSector = ftwo_end - suend; 915 pda_p->raidAddress = sosAddr + (i*secPerSU) + suend; /* off by one? */ 916 (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 917 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); 918 pda_p++; 919 } 920 } 921 break; 922 default: 923 RF_PANIC(); 924 } 925 } 926 927 /* after the last accessed disk */ 928 for (; i < numDataCol; i++ ) 929 { 930 if ((pda_p - (*pdap)) == napdas) 931 continue; 932 pda_p->type = RF_PDA_TYPE_DATA; 933 pda_p->raidAddress = sosAddr + (i * secPerSU); 934 (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 935 /* skip over dead disks */ 936 if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status)) 937 continue; 938 switch (state) 939 { 940 case 1: /* fone */ 941 pda_p->numSector = fone->numSector; 942 pda_p->raidAddress += fone_start; 943 pda_p->startSector += fone_start; 944 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); 945 break; 946 case 2: /* full stripe */ 947 pda_p->numSector = secPerSU; 948 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,secPerSU), (char *), allocList); 949 break; 950 case 3: /* two slabs */ 951 pda_p->numSector = fone->numSector; 952 pda_p->raidAddress += fone_start; 953 pda_p->startSector += fone_start; 954 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); 955 pda_p++; 956 pda_p->type = RF_PDA_TYPE_DATA; 957 pda_p->raidAddress = sosAddr + (i * secPerSU); 958 (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 959 pda_p->numSector = ftwo->numSector; 960 pda_p->raidAddress += ftwo_start; 961 pda_p->startSector += ftwo_start; 962 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); 963 break; 964 default: 965 RF_PANIC(); 966 } 967 pda_p++; 968 } 969 970 RF_ASSERT (pda_p - *pdap == napdas); 971 return; 972} 973 974#define INIT_DISK_NODE(node,name) \ 975rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2,1,4,0, dag_h, name, allocList); \ 976(node)->succedents[0] = unblockNode; \ 977(node)->succedents[1] = recoveryNode; \ 978(node)->antecedents[0] = blockNode; \ 979(node)->antType[0] = rf_control 980 981#define DISK_NODE_PARAMS(_node_,_p_) \ 982 (_node_).params[0].p = _p_ ; \ 983 (_node_).params[1].p = (_p_)->bufPtr; \ 984 (_node_).params[2].v = parityStripeID; \ 985 (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru) 986 987void rf_DoubleDegRead( 988 RF_Raid_t *raidPtr, 989 RF_AccessStripeMap_t *asmap, 990 RF_DagHeader_t *dag_h, 991 void *bp, 992 RF_RaidAccessFlags_t flags, 993 RF_AllocListElem_t *allocList, 994 char *redundantReadNodeName, 995 char *recoveryNodeName, 996 int (*recovFunc)(RF_DagNode_t *)) 997{ 998 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 999 RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *recoveryNode, *blockNode, *unblockNode, *rpNodes, *rqNodes, *termNode; 1000 RF_PhysDiskAddr_t *pda, *pqPDAs; 1001 RF_PhysDiskAddr_t *npdas; 1002 int nNodes, nRrdNodes, nRudNodes, i; 1003 RF_ReconUnitNum_t which_ru; 1004 int nReadNodes, nPQNodes; 1005 RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0]; 1006 RF_PhysDiskAddr_t *failedPDAtwo = asmap->failedPDAs[1]; 1007 RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru); 1008 1009 if (rf_dagDebug) printf("[Creating Double Degraded Read DAG]\n"); 1010 rf_DD_GenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, &pqPDAs, &nPQNodes,allocList); 1011 1012 nRudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed); 1013 nReadNodes = nRrdNodes + nRudNodes + 2*nPQNodes; 1014 nNodes = 4 /* block, unblock, recovery, term */ + nReadNodes; 1015 1016 RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); 1017 i = 0; 1018 blockNode = &nodes[i]; i += 1; 1019 unblockNode = &nodes[i]; i += 1; 1020 recoveryNode = &nodes[i]; i += 1; 1021 termNode = &nodes[i]; i += 1; 1022 rudNodes = &nodes[i]; i += nRudNodes; 1023 rrdNodes = &nodes[i]; i += nRrdNodes; 1024 rpNodes = &nodes[i]; i += nPQNodes; 1025 rqNodes = &nodes[i]; i += nPQNodes; 1026 RF_ASSERT(i == nNodes); 1027 1028 dag_h->numSuccedents = 1; 1029 dag_h->succedents[0] = blockNode; 1030 dag_h->creator = "DoubleDegRead"; 1031 dag_h->numCommits = 0; 1032 dag_h->numCommitNodes = 1; /*unblock */ 1033 1034 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 2, 0, 0, dag_h, "Trm", allocList); 1035 termNode->antecedents[0] = unblockNode; 1036 termNode->antType[0] = rf_control; 1037 termNode->antecedents[1] = recoveryNode; 1038 termNode->antType[1] = rf_control; 1039 1040 /* init the block and unblock nodes */ 1041 /* The block node has all nodes except itself, unblock and recovery as successors. Similarly for 1042 predecessors of the unblock. */ 1043 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, "Nil", allocList); 1044 rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nReadNodes, 0, 0, dag_h, "Nil", allocList); 1045 1046 for (i=0; i < nReadNodes; i++) 1047 { 1048 blockNode->succedents[i] = rudNodes+i; 1049 unblockNode->antecedents[i] = rudNodes+i; 1050 unblockNode->antType[i] = rf_control; 1051 } 1052 unblockNode->succedents[0] = termNode; 1053 1054 /* The recovery node has all the reads as predecessors, and the term node as successors. It gets a pda as a param 1055 from each of the read nodes plus the raidPtr. 1056 For each failed unit is has a result pda. */ 1057 rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, rf_NullNodeUndoFunc, NULL, 1058 1, /* succesors */ 1059 nReadNodes, /* preds */ 1060 nReadNodes+2, /* params */ 1061 asmap->numDataFailed, /* results */ 1062 dag_h, recoveryNodeName, allocList); 1063 1064 recoveryNode->succedents[0] = termNode; 1065 for (i=0; i < nReadNodes; i++) { 1066 recoveryNode->antecedents[i] = rudNodes+i; 1067 recoveryNode->antType[i] = rf_trueData; 1068 } 1069 1070 /* build the read nodes, then come back and fill in recovery params and results */ 1071 pda = asmap->physInfo; 1072 for (i=0; i < nRudNodes; pda = pda->next) 1073 { 1074 if ((pda == failedPDA) || (pda == failedPDAtwo)) 1075 continue; 1076 INIT_DISK_NODE(rudNodes+i,"Rud"); 1077 RF_ASSERT(pda); 1078 DISK_NODE_PARAMS(rudNodes[i],pda); 1079 i++; 1080 } 1081 1082 pda = npdas; 1083 for (i=0; i < nRrdNodes; i++, pda = pda->next) 1084 { 1085 INIT_DISK_NODE(rrdNodes+i,"Rrd"); 1086 RF_ASSERT(pda); 1087 DISK_NODE_PARAMS(rrdNodes[i],pda); 1088 } 1089 1090 /* redundancy pdas */ 1091 pda = pqPDAs; 1092 INIT_DISK_NODE(rpNodes,"Rp"); 1093 RF_ASSERT(pda); 1094 DISK_NODE_PARAMS(rpNodes[0],pda); 1095 pda++; 1096 INIT_DISK_NODE(rqNodes,redundantReadNodeName ); 1097 RF_ASSERT(pda); 1098 DISK_NODE_PARAMS(rqNodes[0],pda); 1099 if (nPQNodes==2) 1100 { 1101 pda++; 1102 INIT_DISK_NODE(rpNodes+1,"Rp"); 1103 RF_ASSERT(pda); 1104 DISK_NODE_PARAMS(rpNodes[1],pda); 1105 pda++; 1106 INIT_DISK_NODE( rqNodes+1,redundantReadNodeName ); 1107 RF_ASSERT(pda); 1108 DISK_NODE_PARAMS(rqNodes[1],pda); 1109 } 1110 1111 /* fill in recovery node params */ 1112 for (i=0; i < nReadNodes; i++) 1113 recoveryNode->params[i] = rudNodes[i].params[0]; /* pda */ 1114 recoveryNode->params[i++].p = (void *) raidPtr; 1115 recoveryNode->params[i++].p = (void *) asmap; 1116 recoveryNode->results[0] = failedPDA; 1117 if (asmap->numDataFailed ==2 ) 1118 recoveryNode->results[1] = failedPDAtwo; 1119 1120 /* zero fill the target data buffers? */ 1121} 1122