rf_dagdegrd.c revision 1.11
1/* $NetBSD: rf_dagdegrd.c,v 1.11 2002/05/22 15:40:47 wiz Exp $ */ 2/* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29/* 30 * rf_dagdegrd.c 31 * 32 * code for creating degraded read DAGs 33 */ 34 35#include <sys/cdefs.h> 36__KERNEL_RCSID(0, "$NetBSD: rf_dagdegrd.c,v 1.11 2002/05/22 15:40:47 wiz Exp $"); 37 38#include <dev/raidframe/raidframevar.h> 39 40#include "rf_archs.h" 41#include "rf_raid.h" 42#include "rf_dag.h" 43#include "rf_dagutils.h" 44#include "rf_dagfuncs.h" 45#include "rf_debugMem.h" 46#include "rf_memchunk.h" 47#include "rf_general.h" 48#include "rf_dagdegrd.h" 49 50 51/****************************************************************************** 52 * 53 * General comments on DAG creation: 54 * 55 * All DAGs in this file use roll-away error recovery. Each DAG has a single 56 * commit node, usually called "Cmt." If an error occurs before the Cmt node 57 * is reached, the execution engine will halt forward execution and work 58 * backward through the graph, executing the undo functions. Assuming that 59 * each node in the graph prior to the Cmt node are undoable and atomic - or - 60 * does not make changes to permanent state, the graph will fail atomically. 61 * If an error occurs after the Cmt node executes, the engine will roll-forward 62 * through the graph, blindly executing nodes until it reaches the end. 63 * If a graph reaches the end, it is assumed to have completed successfully. 64 * 65 * A graph has only 1 Cmt node. 66 * 67 */ 68 69 70/****************************************************************************** 71 * 72 * The following wrappers map the standard DAG creation interface to the 73 * DAG creation routines. Additionally, these wrappers enable experimentation 74 * with new DAG structures by providing an extra level of indirection, allowing 75 * the DAG creation routines to be replaced at this single point. 76 */ 77 78void 79rf_CreateRaidFiveDegradedReadDAG( 80 RF_Raid_t * raidPtr, 81 RF_AccessStripeMap_t * asmap, 82 RF_DagHeader_t * dag_h, 83 void *bp, 84 RF_RaidAccessFlags_t flags, 85 RF_AllocListElem_t * allocList) 86{ 87 rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 88 &rf_xorRecoveryFuncs); 89} 90 91 92/****************************************************************************** 93 * 94 * DAG creation code begins here 95 */ 96 97 98/****************************************************************************** 99 * Create a degraded read DAG for RAID level 1 100 * 101 * Hdr -> Nil -> R(p/s)d -> Commit -> Trm 102 * 103 * The "Rd" node reads data from the surviving disk in the mirror pair 104 * Rpd - read of primary copy 105 * Rsd - read of secondary copy 106 * 107 * Parameters: raidPtr - description of the physical array 108 * asmap - logical & physical addresses for this access 109 * bp - buffer ptr (for holding write data) 110 * flags - general flags (e.g. disk locking) 111 * allocList - list of memory allocated in DAG creation 112 *****************************************************************************/ 113 114void 115rf_CreateRaidOneDegradedReadDAG( 116 RF_Raid_t * raidPtr, 117 RF_AccessStripeMap_t * asmap, 118 RF_DagHeader_t * dag_h, 119 void *bp, 120 RF_RaidAccessFlags_t flags, 121 RF_AllocListElem_t * allocList) 122{ 123 RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode; 124 RF_StripeNum_t parityStripeID; 125 RF_ReconUnitNum_t which_ru; 126 RF_PhysDiskAddr_t *pda; 127 int useMirror, i; 128 129 useMirror = 0; 130 parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), 131 asmap->raidAddress, &which_ru); 132 if (rf_dagDebug) { 133 printf("[Creating RAID level 1 degraded read DAG]\n"); 134 } 135 dag_h->creator = "RaidOneDegradedReadDAG"; 136 /* alloc the Wnd nodes and the Wmir node */ 137 if (asmap->numDataFailed == 0) 138 useMirror = RF_FALSE; 139 else 140 useMirror = RF_TRUE; 141 142 /* total number of nodes = 1 + (block + commit + terminator) */ 143 RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); 144 i = 0; 145 rdNode = &nodes[i]; 146 i++; 147 blockNode = &nodes[i]; 148 i++; 149 commitNode = &nodes[i]; 150 i++; 151 termNode = &nodes[i]; 152 i++; 153 154 /* this dag can not commit until the commit node is reached. errors 155 * prior to the commit point imply the dag has failed and must be 156 * retried */ 157 dag_h->numCommitNodes = 1; 158 dag_h->numCommits = 0; 159 dag_h->numSuccedents = 1; 160 161 /* initialize the block, commit, and terminator nodes */ 162 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 163 NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); 164 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 165 NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); 166 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, 167 NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); 168 169 pda = asmap->physInfo; 170 RF_ASSERT(pda != NULL); 171 /* parityInfo must describe entire parity unit */ 172 RF_ASSERT(asmap->parityInfo->next == NULL); 173 174 /* initialize the data node */ 175 if (!useMirror) { 176 /* read primary copy of data */ 177 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 178 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList); 179 rdNode->params[0].p = pda; 180 rdNode->params[1].p = pda->bufPtr; 181 rdNode->params[2].v = parityStripeID; 182 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 183 } else { 184 /* read secondary copy of data */ 185 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 186 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList); 187 rdNode->params[0].p = asmap->parityInfo; 188 rdNode->params[1].p = pda->bufPtr; 189 rdNode->params[2].v = parityStripeID; 190 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 191 } 192 193 /* connect header to block node */ 194 RF_ASSERT(dag_h->numSuccedents == 1); 195 RF_ASSERT(blockNode->numAntecedents == 0); 196 dag_h->succedents[0] = blockNode; 197 198 /* connect block node to rdnode */ 199 RF_ASSERT(blockNode->numSuccedents == 1); 200 RF_ASSERT(rdNode->numAntecedents == 1); 201 blockNode->succedents[0] = rdNode; 202 rdNode->antecedents[0] = blockNode; 203 rdNode->antType[0] = rf_control; 204 205 /* connect rdnode to commit node */ 206 RF_ASSERT(rdNode->numSuccedents == 1); 207 RF_ASSERT(commitNode->numAntecedents == 1); 208 rdNode->succedents[0] = commitNode; 209 commitNode->antecedents[0] = rdNode; 210 commitNode->antType[0] = rf_control; 211 212 /* connect commit node to terminator */ 213 RF_ASSERT(commitNode->numSuccedents == 1); 214 RF_ASSERT(termNode->numAntecedents == 1); 215 RF_ASSERT(termNode->numSuccedents == 0); 216 commitNode->succedents[0] = termNode; 217 termNode->antecedents[0] = commitNode; 218 termNode->antType[0] = rf_control; 219} 220 221 222 223/****************************************************************************** 224 * 225 * creates a DAG to perform a degraded-mode read of data within one stripe. 226 * This DAG is as follows: 227 * 228 * Hdr -> Block -> Rud -> Xor -> Cmt -> T 229 * -> Rrd -> 230 * -> Rp --> 231 * 232 * Each R node is a successor of the L node 233 * One successor arc from each R node goes to C, and the other to X 234 * There is one Rud for each chunk of surviving user data requested by the 235 * user, and one Rrd for each chunk of surviving user data _not_ being read by 236 * the user 237 * R = read, ud = user data, rd = recovery (surviving) data, p = parity 238 * X = XOR, C = Commit, T = terminate 239 * 240 * The block node guarantees a single source node. 241 * 242 * Note: The target buffer for the XOR node is set to the actual user buffer 243 * where the failed data is supposed to end up. This buffer is zero'd by the 244 * code here. Thus, if you create a degraded read dag, use it, and then 245 * re-use, you have to be sure to zero the target buffer prior to the re-use. 246 * 247 * The recfunc argument at the end specifies the name and function used for 248 * the redundancy 249 * recovery function. 250 * 251 *****************************************************************************/ 252 253void 254rf_CreateDegradedReadDAG( 255 RF_Raid_t * raidPtr, 256 RF_AccessStripeMap_t * asmap, 257 RF_DagHeader_t * dag_h, 258 void *bp, 259 RF_RaidAccessFlags_t flags, 260 RF_AllocListElem_t * allocList, 261 RF_RedFuncs_t * recFunc) 262{ 263 RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *xorNode, *blockNode; 264 RF_DagNode_t *commitNode, *rpNode, *termNode; 265 int nNodes, nRrdNodes, nRudNodes, nXorBufs, i; 266 int j, paramNum; 267 RF_SectorCount_t sectorsPerSU; 268 RF_ReconUnitNum_t which_ru; 269 char *overlappingPDAs;/* a temporary array of flags */ 270 RF_AccessStripeMapHeader_t *new_asm_h[2]; 271 RF_PhysDiskAddr_t *pda, *parityPDA; 272 RF_StripeNum_t parityStripeID; 273 RF_PhysDiskAddr_t *failedPDA; 274 RF_RaidLayout_t *layoutPtr; 275 char *rpBuf; 276 277 layoutPtr = &(raidPtr->Layout); 278 /* failedPDA points to the pda within the asm that targets the failed 279 * disk */ 280 failedPDA = asmap->failedPDAs[0]; 281 parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, 282 asmap->raidAddress, &which_ru); 283 sectorsPerSU = layoutPtr->sectorsPerStripeUnit; 284 285 if (rf_dagDebug) { 286 printf("[Creating degraded read DAG]\n"); 287 } 288 RF_ASSERT(asmap->numDataFailed == 1); 289 dag_h->creator = "DegradedReadDAG"; 290 291 /* 292 * generate two ASMs identifying the surviving data we need 293 * in order to recover the lost data 294 */ 295 296 /* overlappingPDAs array must be zero'd */ 297 RF_Calloc(overlappingPDAs, asmap->numStripeUnitsAccessed, sizeof(char), (char *)); 298 rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h, &nXorBufs, 299 &rpBuf, overlappingPDAs, allocList); 300 301 /* 302 * create all the nodes at once 303 * 304 * -1 because no access is generated for the failed pda 305 */ 306 nRudNodes = asmap->numStripeUnitsAccessed - 1; 307 nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) + 308 ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0); 309 nNodes = 5 + nRudNodes + nRrdNodes; /* lock, unlock, xor, Rp, Rud, 310 * Rrd */ 311 RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), 312 allocList); 313 i = 0; 314 blockNode = &nodes[i]; 315 i++; 316 commitNode = &nodes[i]; 317 i++; 318 xorNode = &nodes[i]; 319 i++; 320 rpNode = &nodes[i]; 321 i++; 322 termNode = &nodes[i]; 323 i++; 324 rudNodes = &nodes[i]; 325 i += nRudNodes; 326 rrdNodes = &nodes[i]; 327 i += nRrdNodes; 328 RF_ASSERT(i == nNodes); 329 330 /* initialize nodes */ 331 dag_h->numCommitNodes = 1; 332 dag_h->numCommits = 0; 333 /* this dag can not commit until the commit node is reached errors 334 * prior to the commit point imply the dag has failed */ 335 dag_h->numSuccedents = 1; 336 337 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 338 NULL, nRudNodes + nRrdNodes + 1, 0, 0, 0, dag_h, "Nil", allocList); 339 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 340 NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); 341 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, 342 NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); 343 rf_InitNode(xorNode, rf_wait, RF_FALSE, recFunc->simple, rf_NullNodeUndoFunc, 344 NULL, 1, nRudNodes + nRrdNodes + 1, 2 * nXorBufs + 2, 1, dag_h, 345 recFunc->SimpleName, allocList); 346 347 /* fill in the Rud nodes */ 348 for (pda = asmap->physInfo, i = 0; i < nRudNodes; i++, pda = pda->next) { 349 if (pda == failedPDA) { 350 i--; 351 continue; 352 } 353 rf_InitNode(&rudNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, 354 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, 355 "Rud", allocList); 356 RF_ASSERT(pda); 357 rudNodes[i].params[0].p = pda; 358 rudNodes[i].params[1].p = pda->bufPtr; 359 rudNodes[i].params[2].v = parityStripeID; 360 rudNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 361 } 362 363 /* fill in the Rrd nodes */ 364 i = 0; 365 if (new_asm_h[0]) { 366 for (pda = new_asm_h[0]->stripeMap->physInfo; 367 i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed; 368 i++, pda = pda->next) { 369 rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, 370 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, 371 dag_h, "Rrd", allocList); 372 RF_ASSERT(pda); 373 rrdNodes[i].params[0].p = pda; 374 rrdNodes[i].params[1].p = pda->bufPtr; 375 rrdNodes[i].params[2].v = parityStripeID; 376 rrdNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 377 } 378 } 379 if (new_asm_h[1]) { 380 for (j = 0, pda = new_asm_h[1]->stripeMap->physInfo; 381 j < new_asm_h[1]->stripeMap->numStripeUnitsAccessed; 382 j++, pda = pda->next) { 383 rf_InitNode(&rrdNodes[i + j], rf_wait, RF_FALSE, rf_DiskReadFunc, 384 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, 385 dag_h, "Rrd", allocList); 386 RF_ASSERT(pda); 387 rrdNodes[i + j].params[0].p = pda; 388 rrdNodes[i + j].params[1].p = pda->bufPtr; 389 rrdNodes[i + j].params[2].v = parityStripeID; 390 rrdNodes[i + j].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 391 } 392 } 393 /* make a PDA for the parity unit */ 394 RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 395 parityPDA->row = asmap->parityInfo->row; 396 parityPDA->col = asmap->parityInfo->col; 397 parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU) 398 * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); 399 parityPDA->numSector = failedPDA->numSector; 400 401 /* initialize the Rp node */ 402 rf_InitNode(rpNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 403 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rp ", allocList); 404 rpNode->params[0].p = parityPDA; 405 rpNode->params[1].p = rpBuf; 406 rpNode->params[2].v = parityStripeID; 407 rpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 408 409 /* 410 * the last and nastiest step is to assign all 411 * the parameters of the Xor node 412 */ 413 paramNum = 0; 414 for (i = 0; i < nRrdNodes; i++) { 415 /* all the Rrd nodes need to be xored together */ 416 xorNode->params[paramNum++] = rrdNodes[i].params[0]; 417 xorNode->params[paramNum++] = rrdNodes[i].params[1]; 418 } 419 for (i = 0; i < nRudNodes; i++) { 420 /* any Rud nodes that overlap the failed access need to be 421 * xored in */ 422 if (overlappingPDAs[i]) { 423 RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 424 memcpy((char *) pda, (char *) rudNodes[i].params[0].p, sizeof(RF_PhysDiskAddr_t)); 425 rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0); 426 xorNode->params[paramNum++].p = pda; 427 xorNode->params[paramNum++].p = pda->bufPtr; 428 } 429 } 430 RF_Free(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char)); 431 432 /* install parity pda as last set of params to be xor'd */ 433 xorNode->params[paramNum++].p = parityPDA; 434 xorNode->params[paramNum++].p = rpBuf; 435 436 /* 437 * the last 2 params to the recovery xor node are 438 * the failed PDA and the raidPtr 439 */ 440 xorNode->params[paramNum++].p = failedPDA; 441 xorNode->params[paramNum++].p = raidPtr; 442 RF_ASSERT(paramNum == 2 * nXorBufs + 2); 443 444 /* 445 * The xor node uses results[0] as the target buffer. 446 * Set pointer and zero the buffer. In the kernel, this 447 * may be a user buffer in which case we have to remap it. 448 */ 449 xorNode->results[0] = failedPDA->bufPtr; 450 RF_BZERO(bp, failedPDA->bufPtr, rf_RaidAddressToByte(raidPtr, 451 failedPDA->numSector)); 452 453 /* connect nodes to form graph */ 454 /* connect the header to the block node */ 455 RF_ASSERT(dag_h->numSuccedents == 1); 456 RF_ASSERT(blockNode->numAntecedents == 0); 457 dag_h->succedents[0] = blockNode; 458 459 /* connect the block node to the read nodes */ 460 RF_ASSERT(blockNode->numSuccedents == (1 + nRrdNodes + nRudNodes)); 461 RF_ASSERT(rpNode->numAntecedents == 1); 462 blockNode->succedents[0] = rpNode; 463 rpNode->antecedents[0] = blockNode; 464 rpNode->antType[0] = rf_control; 465 for (i = 0; i < nRrdNodes; i++) { 466 RF_ASSERT(rrdNodes[i].numSuccedents == 1); 467 blockNode->succedents[1 + i] = &rrdNodes[i]; 468 rrdNodes[i].antecedents[0] = blockNode; 469 rrdNodes[i].antType[0] = rf_control; 470 } 471 for (i = 0; i < nRudNodes; i++) { 472 RF_ASSERT(rudNodes[i].numSuccedents == 1); 473 blockNode->succedents[1 + nRrdNodes + i] = &rudNodes[i]; 474 rudNodes[i].antecedents[0] = blockNode; 475 rudNodes[i].antType[0] = rf_control; 476 } 477 478 /* connect the read nodes to the xor node */ 479 RF_ASSERT(xorNode->numAntecedents == (1 + nRrdNodes + nRudNodes)); 480 RF_ASSERT(rpNode->numSuccedents == 1); 481 rpNode->succedents[0] = xorNode; 482 xorNode->antecedents[0] = rpNode; 483 xorNode->antType[0] = rf_trueData; 484 for (i = 0; i < nRrdNodes; i++) { 485 RF_ASSERT(rrdNodes[i].numSuccedents == 1); 486 rrdNodes[i].succedents[0] = xorNode; 487 xorNode->antecedents[1 + i] = &rrdNodes[i]; 488 xorNode->antType[1 + i] = rf_trueData; 489 } 490 for (i = 0; i < nRudNodes; i++) { 491 RF_ASSERT(rudNodes[i].numSuccedents == 1); 492 rudNodes[i].succedents[0] = xorNode; 493 xorNode->antecedents[1 + nRrdNodes + i] = &rudNodes[i]; 494 xorNode->antType[1 + nRrdNodes + i] = rf_trueData; 495 } 496 497 /* connect the xor node to the commit node */ 498 RF_ASSERT(xorNode->numSuccedents == 1); 499 RF_ASSERT(commitNode->numAntecedents == 1); 500 xorNode->succedents[0] = commitNode; 501 commitNode->antecedents[0] = xorNode; 502 commitNode->antType[0] = rf_control; 503 504 /* connect the termNode to the commit node */ 505 RF_ASSERT(commitNode->numSuccedents == 1); 506 RF_ASSERT(termNode->numAntecedents == 1); 507 RF_ASSERT(termNode->numSuccedents == 0); 508 commitNode->succedents[0] = termNode; 509 termNode->antType[0] = rf_control; 510 termNode->antecedents[0] = commitNode; 511} 512 513#if (RF_INCLUDE_CHAINDECLUSTER > 0) 514/****************************************************************************** 515 * Create a degraded read DAG for Chained Declustering 516 * 517 * Hdr -> Nil -> R(p/s)d -> Cmt -> Trm 518 * 519 * The "Rd" node reads data from the surviving disk in the mirror pair 520 * Rpd - read of primary copy 521 * Rsd - read of secondary copy 522 * 523 * Parameters: raidPtr - description of the physical array 524 * asmap - logical & physical addresses for this access 525 * bp - buffer ptr (for holding write data) 526 * flags - general flags (e.g. disk locking) 527 * allocList - list of memory allocated in DAG creation 528 *****************************************************************************/ 529 530void 531rf_CreateRaidCDegradedReadDAG( 532 RF_Raid_t * raidPtr, 533 RF_AccessStripeMap_t * asmap, 534 RF_DagHeader_t * dag_h, 535 void *bp, 536 RF_RaidAccessFlags_t flags, 537 RF_AllocListElem_t * allocList) 538{ 539 RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode; 540 RF_StripeNum_t parityStripeID; 541 int useMirror, i, shiftable; 542 RF_ReconUnitNum_t which_ru; 543 RF_PhysDiskAddr_t *pda; 544 545 if ((asmap->numDataFailed + asmap->numParityFailed) == 0) { 546 shiftable = RF_TRUE; 547 } else { 548 shiftable = RF_FALSE; 549 } 550 useMirror = 0; 551 parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), 552 asmap->raidAddress, &which_ru); 553 554 if (rf_dagDebug) { 555 printf("[Creating RAID C degraded read DAG]\n"); 556 } 557 dag_h->creator = "RaidCDegradedReadDAG"; 558 /* alloc the Wnd nodes and the Wmir node */ 559 if (asmap->numDataFailed == 0) 560 useMirror = RF_FALSE; 561 else 562 useMirror = RF_TRUE; 563 564 /* total number of nodes = 1 + (block + commit + terminator) */ 565 RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); 566 i = 0; 567 rdNode = &nodes[i]; 568 i++; 569 blockNode = &nodes[i]; 570 i++; 571 commitNode = &nodes[i]; 572 i++; 573 termNode = &nodes[i]; 574 i++; 575 576 /* 577 * This dag can not commit until the commit node is reached. 578 * Errors prior to the commit point imply the dag has failed 579 * and must be retried. 580 */ 581 dag_h->numCommitNodes = 1; 582 dag_h->numCommits = 0; 583 dag_h->numSuccedents = 1; 584 585 /* initialize the block, commit, and terminator nodes */ 586 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 587 NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); 588 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 589 NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); 590 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, 591 NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); 592 593 pda = asmap->physInfo; 594 RF_ASSERT(pda != NULL); 595 /* parityInfo must describe entire parity unit */ 596 RF_ASSERT(asmap->parityInfo->next == NULL); 597 598 /* initialize the data node */ 599 if (!useMirror) { 600 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 601 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList); 602 if (shiftable && rf_compute_workload_shift(raidPtr, pda)) { 603 /* shift this read to the next disk in line */ 604 rdNode->params[0].p = asmap->parityInfo; 605 rdNode->params[1].p = pda->bufPtr; 606 rdNode->params[2].v = parityStripeID; 607 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 608 } else { 609 /* read primary copy */ 610 rdNode->params[0].p = pda; 611 rdNode->params[1].p = pda->bufPtr; 612 rdNode->params[2].v = parityStripeID; 613 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 614 } 615 } else { 616 /* read secondary copy of data */ 617 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 618 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList); 619 rdNode->params[0].p = asmap->parityInfo; 620 rdNode->params[1].p = pda->bufPtr; 621 rdNode->params[2].v = parityStripeID; 622 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 623 } 624 625 /* connect header to block node */ 626 RF_ASSERT(dag_h->numSuccedents == 1); 627 RF_ASSERT(blockNode->numAntecedents == 0); 628 dag_h->succedents[0] = blockNode; 629 630 /* connect block node to rdnode */ 631 RF_ASSERT(blockNode->numSuccedents == 1); 632 RF_ASSERT(rdNode->numAntecedents == 1); 633 blockNode->succedents[0] = rdNode; 634 rdNode->antecedents[0] = blockNode; 635 rdNode->antType[0] = rf_control; 636 637 /* connect rdnode to commit node */ 638 RF_ASSERT(rdNode->numSuccedents == 1); 639 RF_ASSERT(commitNode->numAntecedents == 1); 640 rdNode->succedents[0] = commitNode; 641 commitNode->antecedents[0] = rdNode; 642 commitNode->antType[0] = rf_control; 643 644 /* connect commit node to terminator */ 645 RF_ASSERT(commitNode->numSuccedents == 1); 646 RF_ASSERT(termNode->numAntecedents == 1); 647 RF_ASSERT(termNode->numSuccedents == 0); 648 commitNode->succedents[0] = termNode; 649 termNode->antecedents[0] = commitNode; 650 termNode->antType[0] = rf_control; 651} 652#endif /* (RF_INCLUDE_CHAINDECLUSTER > 0) */ 653 654#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) 655/* 656 * XXX move this elsewhere? 657 */ 658void 659rf_DD_GenerateFailedAccessASMs( 660 RF_Raid_t * raidPtr, 661 RF_AccessStripeMap_t * asmap, 662 RF_PhysDiskAddr_t ** pdap, 663 int *nNodep, 664 RF_PhysDiskAddr_t ** pqpdap, 665 int *nPQNodep, 666 RF_AllocListElem_t * allocList) 667{ 668 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 669 int PDAPerDisk, i; 670 RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; 671 int numDataCol = layoutPtr->numDataCol; 672 int state; 673 RF_SectorNum_t suoff, suend; 674 unsigned firstDataCol, napdas, count; 675 RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end = 0; 676 RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0], *ftwo = asmap->failedPDAs[1]; 677 RF_PhysDiskAddr_t *pda_p; 678 RF_PhysDiskAddr_t *phys_p; 679 RF_RaidAddr_t sosAddr; 680 681 /* determine how many pda's we will have to generate per unaccess 682 * stripe. If there is only one failed data unit, it is one; if two, 683 * possibly two, depending wether they overlap. */ 684 685 fone_start = rf_StripeUnitOffset(layoutPtr, fone->startSector); 686 fone_end = fone_start + fone->numSector; 687 688#define CONS_PDA(if,start,num) \ 689 pda_p->row = asmap->if->row; pda_p->col = asmap->if->col; \ 690 pda_p->startSector = ((asmap->if->startSector / secPerSU) * secPerSU) + start; \ 691 pda_p->numSector = num; \ 692 pda_p->next = NULL; \ 693 RF_MallocAndAdd(pda_p->bufPtr,rf_RaidAddressToByte(raidPtr,num),(char *), allocList) 694 695 if (asmap->numDataFailed == 1) { 696 PDAPerDisk = 1; 697 state = 1; 698 RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 699 pda_p = *pqpdap; 700 /* build p */ 701 CONS_PDA(parityInfo, fone_start, fone->numSector); 702 pda_p->type = RF_PDA_TYPE_PARITY; 703 pda_p++; 704 /* build q */ 705 CONS_PDA(qInfo, fone_start, fone->numSector); 706 pda_p->type = RF_PDA_TYPE_Q; 707 } else { 708 ftwo_start = rf_StripeUnitOffset(layoutPtr, ftwo->startSector); 709 ftwo_end = ftwo_start + ftwo->numSector; 710 if (fone->numSector + ftwo->numSector > secPerSU) { 711 PDAPerDisk = 1; 712 state = 2; 713 RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 714 pda_p = *pqpdap; 715 CONS_PDA(parityInfo, 0, secPerSU); 716 pda_p->type = RF_PDA_TYPE_PARITY; 717 pda_p++; 718 CONS_PDA(qInfo, 0, secPerSU); 719 pda_p->type = RF_PDA_TYPE_Q; 720 } else { 721 PDAPerDisk = 2; 722 state = 3; 723 /* four of them, fone, then ftwo */ 724 RF_MallocAndAdd(*pqpdap, 4 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 725 pda_p = *pqpdap; 726 CONS_PDA(parityInfo, fone_start, fone->numSector); 727 pda_p->type = RF_PDA_TYPE_PARITY; 728 pda_p++; 729 CONS_PDA(qInfo, fone_start, fone->numSector); 730 pda_p->type = RF_PDA_TYPE_Q; 731 pda_p++; 732 CONS_PDA(parityInfo, ftwo_start, ftwo->numSector); 733 pda_p->type = RF_PDA_TYPE_PARITY; 734 pda_p++; 735 CONS_PDA(qInfo, ftwo_start, ftwo->numSector); 736 pda_p->type = RF_PDA_TYPE_Q; 737 } 738 } 739 /* figure out number of nonaccessed pda */ 740 napdas = PDAPerDisk * (numDataCol - asmap->numStripeUnitsAccessed - (ftwo == NULL ? 1 : 0)); 741 *nPQNodep = PDAPerDisk; 742 743 /* sweep over the over accessed pda's, figuring out the number of 744 * additional pda's to generate. Of course, skip the failed ones */ 745 746 count = 0; 747 for (pda_p = asmap->physInfo; pda_p; pda_p = pda_p->next) { 748 if ((pda_p == fone) || (pda_p == ftwo)) 749 continue; 750 suoff = rf_StripeUnitOffset(layoutPtr, pda_p->startSector); 751 suend = suoff + pda_p->numSector; 752 switch (state) { 753 case 1: /* one failed PDA to overlap */ 754 /* if a PDA doesn't contain the failed unit, it can 755 * only miss the start or end, not both */ 756 if ((suoff > fone_start) || (suend < fone_end)) 757 count++; 758 break; 759 case 2: /* whole stripe */ 760 if (suoff) /* leak at begining */ 761 count++; 762 if (suend < numDataCol) /* leak at end */ 763 count++; 764 break; 765 case 3: /* two disjoint units */ 766 if ((suoff > fone_start) || (suend < fone_end)) 767 count++; 768 if ((suoff > ftwo_start) || (suend < ftwo_end)) 769 count++; 770 break; 771 default: 772 RF_PANIC(); 773 } 774 } 775 776 napdas += count; 777 *nNodep = napdas; 778 if (napdas == 0) 779 return; /* short circuit */ 780 781 /* allocate up our list of pda's */ 782 783 RF_CallocAndAdd(pda_p, napdas, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 784 *pdap = pda_p; 785 786 /* linkem together */ 787 for (i = 0; i < (napdas - 1); i++) 788 pda_p[i].next = pda_p + (i + 1); 789 790 /* march through the one's up to the first accessed disk */ 791 firstDataCol = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), asmap->physInfo->raidAddress) % numDataCol; 792 sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); 793 for (i = 0; i < firstDataCol; i++) { 794 if ((pda_p - (*pdap)) == napdas) 795 continue; 796 pda_p->type = RF_PDA_TYPE_DATA; 797 pda_p->raidAddress = sosAddr + (i * secPerSU); 798 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 799 /* skip over dead disks */ 800 if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status)) 801 continue; 802 switch (state) { 803 case 1: /* fone */ 804 pda_p->numSector = fone->numSector; 805 pda_p->raidAddress += fone_start; 806 pda_p->startSector += fone_start; 807 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 808 break; 809 case 2: /* full stripe */ 810 pda_p->numSector = secPerSU; 811 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList); 812 break; 813 case 3: /* two slabs */ 814 pda_p->numSector = fone->numSector; 815 pda_p->raidAddress += fone_start; 816 pda_p->startSector += fone_start; 817 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 818 pda_p++; 819 pda_p->type = RF_PDA_TYPE_DATA; 820 pda_p->raidAddress = sosAddr + (i * secPerSU); 821 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 822 pda_p->numSector = ftwo->numSector; 823 pda_p->raidAddress += ftwo_start; 824 pda_p->startSector += ftwo_start; 825 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 826 break; 827 default: 828 RF_PANIC(); 829 } 830 pda_p++; 831 } 832 833 /* march through the touched stripe units */ 834 for (phys_p = asmap->physInfo; phys_p; phys_p = phys_p->next, i++) { 835 if ((phys_p == asmap->failedPDAs[0]) || (phys_p == asmap->failedPDAs[1])) 836 continue; 837 suoff = rf_StripeUnitOffset(layoutPtr, phys_p->startSector); 838 suend = suoff + phys_p->numSector; 839 switch (state) { 840 case 1: /* single buffer */ 841 if (suoff > fone_start) { 842 RF_ASSERT(suend >= fone_end); 843 /* The data read starts after the mapped 844 * access, snip off the begining */ 845 pda_p->numSector = suoff - fone_start; 846 pda_p->raidAddress = sosAddr + (i * secPerSU) + fone_start; 847 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 848 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 849 pda_p++; 850 } 851 if (suend < fone_end) { 852 RF_ASSERT(suoff <= fone_start); 853 /* The data read stops before the end of the 854 * failed access, extend */ 855 pda_p->numSector = fone_end - suend; 856 pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ 857 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 858 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 859 pda_p++; 860 } 861 break; 862 case 2: /* whole stripe unit */ 863 RF_ASSERT((suoff == 0) || (suend == secPerSU)); 864 if (suend < secPerSU) { /* short read, snip from end 865 * on */ 866 pda_p->numSector = secPerSU - suend; 867 pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ 868 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 869 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 870 pda_p++; 871 } else 872 if (suoff > 0) { /* short at front */ 873 pda_p->numSector = suoff; 874 pda_p->raidAddress = sosAddr + (i * secPerSU); 875 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 876 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 877 pda_p++; 878 } 879 break; 880 case 3: /* two nonoverlapping failures */ 881 if ((suoff > fone_start) || (suend < fone_end)) { 882 if (suoff > fone_start) { 883 RF_ASSERT(suend >= fone_end); 884 /* The data read starts after the 885 * mapped access, snip off the 886 * begining */ 887 pda_p->numSector = suoff - fone_start; 888 pda_p->raidAddress = sosAddr + (i * secPerSU) + fone_start; 889 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 890 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 891 pda_p++; 892 } 893 if (suend < fone_end) { 894 RF_ASSERT(suoff <= fone_start); 895 /* The data read stops before the end 896 * of the failed access, extend */ 897 pda_p->numSector = fone_end - suend; 898 pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ 899 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 900 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 901 pda_p++; 902 } 903 } 904 if ((suoff > ftwo_start) || (suend < ftwo_end)) { 905 if (suoff > ftwo_start) { 906 RF_ASSERT(suend >= ftwo_end); 907 /* The data read starts after the 908 * mapped access, snip off the 909 * begining */ 910 pda_p->numSector = suoff - ftwo_start; 911 pda_p->raidAddress = sosAddr + (i * secPerSU) + ftwo_start; 912 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 913 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 914 pda_p++; 915 } 916 if (suend < ftwo_end) { 917 RF_ASSERT(suoff <= ftwo_start); 918 /* The data read stops before the end 919 * of the failed access, extend */ 920 pda_p->numSector = ftwo_end - suend; 921 pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ 922 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 923 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 924 pda_p++; 925 } 926 } 927 break; 928 default: 929 RF_PANIC(); 930 } 931 } 932 933 /* after the last accessed disk */ 934 for (; i < numDataCol; i++) { 935 if ((pda_p - (*pdap)) == napdas) 936 continue; 937 pda_p->type = RF_PDA_TYPE_DATA; 938 pda_p->raidAddress = sosAddr + (i * secPerSU); 939 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 940 /* skip over dead disks */ 941 if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status)) 942 continue; 943 switch (state) { 944 case 1: /* fone */ 945 pda_p->numSector = fone->numSector; 946 pda_p->raidAddress += fone_start; 947 pda_p->startSector += fone_start; 948 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 949 break; 950 case 2: /* full stripe */ 951 pda_p->numSector = secPerSU; 952 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList); 953 break; 954 case 3: /* two slabs */ 955 pda_p->numSector = fone->numSector; 956 pda_p->raidAddress += fone_start; 957 pda_p->startSector += fone_start; 958 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 959 pda_p++; 960 pda_p->type = RF_PDA_TYPE_DATA; 961 pda_p->raidAddress = sosAddr + (i * secPerSU); 962 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); 963 pda_p->numSector = ftwo->numSector; 964 pda_p->raidAddress += ftwo_start; 965 pda_p->startSector += ftwo_start; 966 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 967 break; 968 default: 969 RF_PANIC(); 970 } 971 pda_p++; 972 } 973 974 RF_ASSERT(pda_p - *pdap == napdas); 975 return; 976} 977#define INIT_DISK_NODE(node,name) \ 978rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2,1,4,0, dag_h, name, allocList); \ 979(node)->succedents[0] = unblockNode; \ 980(node)->succedents[1] = recoveryNode; \ 981(node)->antecedents[0] = blockNode; \ 982(node)->antType[0] = rf_control 983 984#define DISK_NODE_PARAMS(_node_,_p_) \ 985 (_node_).params[0].p = _p_ ; \ 986 (_node_).params[1].p = (_p_)->bufPtr; \ 987 (_node_).params[2].v = parityStripeID; \ 988 (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru) 989 990void 991rf_DoubleDegRead( 992 RF_Raid_t * raidPtr, 993 RF_AccessStripeMap_t * asmap, 994 RF_DagHeader_t * dag_h, 995 void *bp, 996 RF_RaidAccessFlags_t flags, 997 RF_AllocListElem_t * allocList, 998 char *redundantReadNodeName, 999 char *recoveryNodeName, 1000 int (*recovFunc) (RF_DagNode_t *)) 1001{ 1002 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 1003 RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *recoveryNode, *blockNode, 1004 *unblockNode, *rpNodes, *rqNodes, *termNode; 1005 RF_PhysDiskAddr_t *pda, *pqPDAs; 1006 RF_PhysDiskAddr_t *npdas; 1007 int nNodes, nRrdNodes, nRudNodes, i; 1008 RF_ReconUnitNum_t which_ru; 1009 int nReadNodes, nPQNodes; 1010 RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0]; 1011 RF_PhysDiskAddr_t *failedPDAtwo = asmap->failedPDAs[1]; 1012 RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru); 1013 1014 if (rf_dagDebug) 1015 printf("[Creating Double Degraded Read DAG]\n"); 1016 rf_DD_GenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, &pqPDAs, &nPQNodes, allocList); 1017 1018 nRudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed); 1019 nReadNodes = nRrdNodes + nRudNodes + 2 * nPQNodes; 1020 nNodes = 4 /* block, unblock, recovery, term */ + nReadNodes; 1021 1022 RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); 1023 i = 0; 1024 blockNode = &nodes[i]; 1025 i += 1; 1026 unblockNode = &nodes[i]; 1027 i += 1; 1028 recoveryNode = &nodes[i]; 1029 i += 1; 1030 termNode = &nodes[i]; 1031 i += 1; 1032 rudNodes = &nodes[i]; 1033 i += nRudNodes; 1034 rrdNodes = &nodes[i]; 1035 i += nRrdNodes; 1036 rpNodes = &nodes[i]; 1037 i += nPQNodes; 1038 rqNodes = &nodes[i]; 1039 i += nPQNodes; 1040 RF_ASSERT(i == nNodes); 1041 1042 dag_h->numSuccedents = 1; 1043 dag_h->succedents[0] = blockNode; 1044 dag_h->creator = "DoubleDegRead"; 1045 dag_h->numCommits = 0; 1046 dag_h->numCommitNodes = 1; /* unblock */ 1047 1048 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 2, 0, 0, dag_h, "Trm", allocList); 1049 termNode->antecedents[0] = unblockNode; 1050 termNode->antType[0] = rf_control; 1051 termNode->antecedents[1] = recoveryNode; 1052 termNode->antType[1] = rf_control; 1053 1054 /* init the block and unblock nodes */ 1055 /* The block node has all nodes except itself, unblock and recovery as 1056 * successors. Similarly for predecessors of the unblock. */ 1057 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, "Nil", allocList); 1058 rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nReadNodes, 0, 0, dag_h, "Nil", allocList); 1059 1060 for (i = 0; i < nReadNodes; i++) { 1061 blockNode->succedents[i] = rudNodes + i; 1062 unblockNode->antecedents[i] = rudNodes + i; 1063 unblockNode->antType[i] = rf_control; 1064 } 1065 unblockNode->succedents[0] = termNode; 1066 1067 /* The recovery node has all the reads as predecessors, and the term 1068 * node as successors. It gets a pda as a param from each of the read 1069 * nodes plus the raidPtr. For each failed unit is has a result pda. */ 1070 rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, rf_NullNodeUndoFunc, NULL, 1071 1, /* succesors */ 1072 nReadNodes, /* preds */ 1073 nReadNodes + 2, /* params */ 1074 asmap->numDataFailed, /* results */ 1075 dag_h, recoveryNodeName, allocList); 1076 1077 recoveryNode->succedents[0] = termNode; 1078 for (i = 0; i < nReadNodes; i++) { 1079 recoveryNode->antecedents[i] = rudNodes + i; 1080 recoveryNode->antType[i] = rf_trueData; 1081 } 1082 1083 /* build the read nodes, then come back and fill in recovery params 1084 * and results */ 1085 pda = asmap->physInfo; 1086 for (i = 0; i < nRudNodes; pda = pda->next) { 1087 if ((pda == failedPDA) || (pda == failedPDAtwo)) 1088 continue; 1089 INIT_DISK_NODE(rudNodes + i, "Rud"); 1090 RF_ASSERT(pda); 1091 DISK_NODE_PARAMS(rudNodes[i], pda); 1092 i++; 1093 } 1094 1095 pda = npdas; 1096 for (i = 0; i < nRrdNodes; i++, pda = pda->next) { 1097 INIT_DISK_NODE(rrdNodes + i, "Rrd"); 1098 RF_ASSERT(pda); 1099 DISK_NODE_PARAMS(rrdNodes[i], pda); 1100 } 1101 1102 /* redundancy pdas */ 1103 pda = pqPDAs; 1104 INIT_DISK_NODE(rpNodes, "Rp"); 1105 RF_ASSERT(pda); 1106 DISK_NODE_PARAMS(rpNodes[0], pda); 1107 pda++; 1108 INIT_DISK_NODE(rqNodes, redundantReadNodeName); 1109 RF_ASSERT(pda); 1110 DISK_NODE_PARAMS(rqNodes[0], pda); 1111 if (nPQNodes == 2) { 1112 pda++; 1113 INIT_DISK_NODE(rpNodes + 1, "Rp"); 1114 RF_ASSERT(pda); 1115 DISK_NODE_PARAMS(rpNodes[1], pda); 1116 pda++; 1117 INIT_DISK_NODE(rqNodes + 1, redundantReadNodeName); 1118 RF_ASSERT(pda); 1119 DISK_NODE_PARAMS(rqNodes[1], pda); 1120 } 1121 /* fill in recovery node params */ 1122 for (i = 0; i < nReadNodes; i++) 1123 recoveryNode->params[i] = rudNodes[i].params[0]; /* pda */ 1124 recoveryNode->params[i++].p = (void *) raidPtr; 1125 recoveryNode->params[i++].p = (void *) asmap; 1126 recoveryNode->results[0] = failedPDA; 1127 if (asmap->numDataFailed == 2) 1128 recoveryNode->results[1] = failedPDAtwo; 1129 1130 /* zero fill the target data buffers? */ 1131} 1132 1133#endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) */ 1134