rf_dagdegrd.c revision 1.15
1/* $NetBSD: rf_dagdegrd.c,v 1.15 2003/12/29 03:33:47 oster Exp $ */ 2/* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29/* 30 * rf_dagdegrd.c 31 * 32 * code for creating degraded read DAGs 33 */ 34 35#include <sys/cdefs.h> 36__KERNEL_RCSID(0, "$NetBSD: rf_dagdegrd.c,v 1.15 2003/12/29 03:33:47 oster Exp $"); 37 38#include <dev/raidframe/raidframevar.h> 39 40#include "rf_archs.h" 41#include "rf_raid.h" 42#include "rf_dag.h" 43#include "rf_dagutils.h" 44#include "rf_dagfuncs.h" 45#include "rf_debugMem.h" 46#include "rf_general.h" 47#include "rf_dagdegrd.h" 48 49 50/****************************************************************************** 51 * 52 * General comments on DAG creation: 53 * 54 * All DAGs in this file use roll-away error recovery. Each DAG has a single 55 * commit node, usually called "Cmt." If an error occurs before the Cmt node 56 * is reached, the execution engine will halt forward execution and work 57 * backward through the graph, executing the undo functions. Assuming that 58 * each node in the graph prior to the Cmt node are undoable and atomic - or - 59 * does not make changes to permanent state, the graph will fail atomically. 60 * If an error occurs after the Cmt node executes, the engine will roll-forward 61 * through the graph, blindly executing nodes until it reaches the end. 62 * If a graph reaches the end, it is assumed to have completed successfully. 63 * 64 * A graph has only 1 Cmt node. 65 * 66 */ 67 68 69/****************************************************************************** 70 * 71 * The following wrappers map the standard DAG creation interface to the 72 * DAG creation routines. Additionally, these wrappers enable experimentation 73 * with new DAG structures by providing an extra level of indirection, allowing 74 * the DAG creation routines to be replaced at this single point. 75 */ 76 77void 78rf_CreateRaidFiveDegradedReadDAG( 79 RF_Raid_t * raidPtr, 80 RF_AccessStripeMap_t * asmap, 81 RF_DagHeader_t * dag_h, 82 void *bp, 83 RF_RaidAccessFlags_t flags, 84 RF_AllocListElem_t * allocList) 85{ 86 rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 87 &rf_xorRecoveryFuncs); 88} 89 90 91/****************************************************************************** 92 * 93 * DAG creation code begins here 94 */ 95 96 97/****************************************************************************** 98 * Create a degraded read DAG for RAID level 1 99 * 100 * Hdr -> Nil -> R(p/s)d -> Commit -> Trm 101 * 102 * The "Rd" node reads data from the surviving disk in the mirror pair 103 * Rpd - read of primary copy 104 * Rsd - read of secondary copy 105 * 106 * Parameters: raidPtr - description of the physical array 107 * asmap - logical & physical addresses for this access 108 * bp - buffer ptr (for holding write data) 109 * flags - general flags (e.g. disk locking) 110 * allocList - list of memory allocated in DAG creation 111 *****************************************************************************/ 112 113void 114rf_CreateRaidOneDegradedReadDAG( 115 RF_Raid_t * raidPtr, 116 RF_AccessStripeMap_t * asmap, 117 RF_DagHeader_t * dag_h, 118 void *bp, 119 RF_RaidAccessFlags_t flags, 120 RF_AllocListElem_t * allocList) 121{ 122 RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode; 123 RF_StripeNum_t parityStripeID; 124 RF_ReconUnitNum_t which_ru; 125 RF_PhysDiskAddr_t *pda; 126 int useMirror, i; 127 128 useMirror = 0; 129 parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), 130 asmap->raidAddress, &which_ru); 131 if (rf_dagDebug) { 132 printf("[Creating RAID level 1 degraded read DAG]\n"); 133 } 134 dag_h->creator = "RaidOneDegradedReadDAG"; 135 /* alloc the Wnd nodes and the Wmir node */ 136 if (asmap->numDataFailed == 0) 137 useMirror = RF_FALSE; 138 else 139 useMirror = RF_TRUE; 140 141 /* total number of nodes = 1 + (block + commit + terminator) */ 142 RF_MallocAndAdd(nodes, 4 * sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); 143 i = 0; 144 rdNode = &nodes[i]; 145 i++; 146 blockNode = &nodes[i]; 147 i++; 148 commitNode = &nodes[i]; 149 i++; 150 termNode = &nodes[i]; 151 i++; 152 153 /* this dag can not commit until the commit node is reached. errors 154 * prior to the commit point imply the dag has failed and must be 155 * retried */ 156 dag_h->numCommitNodes = 1; 157 dag_h->numCommits = 0; 158 dag_h->numSuccedents = 1; 159 160 /* initialize the block, commit, and terminator nodes */ 161 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 162 NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); 163 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 164 NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); 165 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, 166 NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); 167 168 pda = asmap->physInfo; 169 RF_ASSERT(pda != NULL); 170 /* parityInfo must describe entire parity unit */ 171 RF_ASSERT(asmap->parityInfo->next == NULL); 172 173 /* initialize the data node */ 174 if (!useMirror) { 175 /* read primary copy of data */ 176 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 177 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList); 178 rdNode->params[0].p = pda; 179 rdNode->params[1].p = pda->bufPtr; 180 rdNode->params[2].v = parityStripeID; 181 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 182 } else { 183 /* read secondary copy of data */ 184 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 185 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList); 186 rdNode->params[0].p = asmap->parityInfo; 187 rdNode->params[1].p = pda->bufPtr; 188 rdNode->params[2].v = parityStripeID; 189 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 190 } 191 192 /* connect header to block node */ 193 RF_ASSERT(dag_h->numSuccedents == 1); 194 RF_ASSERT(blockNode->numAntecedents == 0); 195 dag_h->succedents[0] = blockNode; 196 197 /* connect block node to rdnode */ 198 RF_ASSERT(blockNode->numSuccedents == 1); 199 RF_ASSERT(rdNode->numAntecedents == 1); 200 blockNode->succedents[0] = rdNode; 201 rdNode->antecedents[0] = blockNode; 202 rdNode->antType[0] = rf_control; 203 204 /* connect rdnode to commit node */ 205 RF_ASSERT(rdNode->numSuccedents == 1); 206 RF_ASSERT(commitNode->numAntecedents == 1); 207 rdNode->succedents[0] = commitNode; 208 commitNode->antecedents[0] = rdNode; 209 commitNode->antType[0] = rf_control; 210 211 /* connect commit node to terminator */ 212 RF_ASSERT(commitNode->numSuccedents == 1); 213 RF_ASSERT(termNode->numAntecedents == 1); 214 RF_ASSERT(termNode->numSuccedents == 0); 215 commitNode->succedents[0] = termNode; 216 termNode->antecedents[0] = commitNode; 217 termNode->antType[0] = rf_control; 218} 219 220 221 222/****************************************************************************** 223 * 224 * creates a DAG to perform a degraded-mode read of data within one stripe. 225 * This DAG is as follows: 226 * 227 * Hdr -> Block -> Rud -> Xor -> Cmt -> T 228 * -> Rrd -> 229 * -> Rp --> 230 * 231 * Each R node is a successor of the L node 232 * One successor arc from each R node goes to C, and the other to X 233 * There is one Rud for each chunk of surviving user data requested by the 234 * user, and one Rrd for each chunk of surviving user data _not_ being read by 235 * the user 236 * R = read, ud = user data, rd = recovery (surviving) data, p = parity 237 * X = XOR, C = Commit, T = terminate 238 * 239 * The block node guarantees a single source node. 240 * 241 * Note: The target buffer for the XOR node is set to the actual user buffer 242 * where the failed data is supposed to end up. This buffer is zero'd by the 243 * code here. Thus, if you create a degraded read dag, use it, and then 244 * re-use, you have to be sure to zero the target buffer prior to the re-use. 245 * 246 * The recfunc argument at the end specifies the name and function used for 247 * the redundancy 248 * recovery function. 249 * 250 *****************************************************************************/ 251 252void 253rf_CreateDegradedReadDAG( 254 RF_Raid_t * raidPtr, 255 RF_AccessStripeMap_t * asmap, 256 RF_DagHeader_t * dag_h, 257 void *bp, 258 RF_RaidAccessFlags_t flags, 259 RF_AllocListElem_t * allocList, 260 const RF_RedFuncs_t * recFunc) 261{ 262 RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *xorNode, *blockNode; 263 RF_DagNode_t *commitNode, *rpNode, *termNode; 264 int nNodes, nRrdNodes, nRudNodes, nXorBufs, i; 265 int j, paramNum; 266 RF_SectorCount_t sectorsPerSU; 267 RF_ReconUnitNum_t which_ru; 268 char *overlappingPDAs;/* a temporary array of flags */ 269 RF_AccessStripeMapHeader_t *new_asm_h[2]; 270 RF_PhysDiskAddr_t *pda, *parityPDA; 271 RF_StripeNum_t parityStripeID; 272 RF_PhysDiskAddr_t *failedPDA; 273 RF_RaidLayout_t *layoutPtr; 274 char *rpBuf; 275 276 layoutPtr = &(raidPtr->Layout); 277 /* failedPDA points to the pda within the asm that targets the failed 278 * disk */ 279 failedPDA = asmap->failedPDAs[0]; 280 parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, 281 asmap->raidAddress, &which_ru); 282 sectorsPerSU = layoutPtr->sectorsPerStripeUnit; 283 284 if (rf_dagDebug) { 285 printf("[Creating degraded read DAG]\n"); 286 } 287 RF_ASSERT(asmap->numDataFailed == 1); 288 dag_h->creator = "DegradedReadDAG"; 289 290 /* 291 * generate two ASMs identifying the surviving data we need 292 * in order to recover the lost data 293 */ 294 295 /* overlappingPDAs array must be zero'd */ 296 RF_Malloc(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char), (char *)); 297 rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h, &nXorBufs, 298 &rpBuf, overlappingPDAs, allocList); 299 300 /* 301 * create all the nodes at once 302 * 303 * -1 because no access is generated for the failed pda 304 */ 305 nRudNodes = asmap->numStripeUnitsAccessed - 1; 306 nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) + 307 ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0); 308 nNodes = 5 + nRudNodes + nRrdNodes; /* lock, unlock, xor, Rp, Rud, 309 * Rrd */ 310 RF_MallocAndAdd(nodes, nNodes * sizeof(RF_DagNode_t), (RF_DagNode_t *), 311 allocList); 312 i = 0; 313 blockNode = &nodes[i]; 314 i++; 315 commitNode = &nodes[i]; 316 i++; 317 xorNode = &nodes[i]; 318 i++; 319 rpNode = &nodes[i]; 320 i++; 321 termNode = &nodes[i]; 322 i++; 323 rudNodes = &nodes[i]; 324 i += nRudNodes; 325 rrdNodes = &nodes[i]; 326 i += nRrdNodes; 327 RF_ASSERT(i == nNodes); 328 329 /* initialize nodes */ 330 dag_h->numCommitNodes = 1; 331 dag_h->numCommits = 0; 332 /* this dag can not commit until the commit node is reached errors 333 * prior to the commit point imply the dag has failed */ 334 dag_h->numSuccedents = 1; 335 336 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 337 NULL, nRudNodes + nRrdNodes + 1, 0, 0, 0, dag_h, "Nil", allocList); 338 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 339 NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); 340 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, 341 NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); 342 rf_InitNode(xorNode, rf_wait, RF_FALSE, recFunc->simple, rf_NullNodeUndoFunc, 343 NULL, 1, nRudNodes + nRrdNodes + 1, 2 * nXorBufs + 2, 1, dag_h, 344 recFunc->SimpleName, allocList); 345 346 /* fill in the Rud nodes */ 347 for (pda = asmap->physInfo, i = 0; i < nRudNodes; i++, pda = pda->next) { 348 if (pda == failedPDA) { 349 i--; 350 continue; 351 } 352 rf_InitNode(&rudNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, 353 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, 354 "Rud", allocList); 355 RF_ASSERT(pda); 356 rudNodes[i].params[0].p = pda; 357 rudNodes[i].params[1].p = pda->bufPtr; 358 rudNodes[i].params[2].v = parityStripeID; 359 rudNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 360 } 361 362 /* fill in the Rrd nodes */ 363 i = 0; 364 if (new_asm_h[0]) { 365 for (pda = new_asm_h[0]->stripeMap->physInfo; 366 i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed; 367 i++, pda = pda->next) { 368 rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, 369 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, 370 dag_h, "Rrd", allocList); 371 RF_ASSERT(pda); 372 rrdNodes[i].params[0].p = pda; 373 rrdNodes[i].params[1].p = pda->bufPtr; 374 rrdNodes[i].params[2].v = parityStripeID; 375 rrdNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 376 } 377 } 378 if (new_asm_h[1]) { 379 for (j = 0, pda = new_asm_h[1]->stripeMap->physInfo; 380 j < new_asm_h[1]->stripeMap->numStripeUnitsAccessed; 381 j++, pda = pda->next) { 382 rf_InitNode(&rrdNodes[i + j], rf_wait, RF_FALSE, rf_DiskReadFunc, 383 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, 384 dag_h, "Rrd", allocList); 385 RF_ASSERT(pda); 386 rrdNodes[i + j].params[0].p = pda; 387 rrdNodes[i + j].params[1].p = pda->bufPtr; 388 rrdNodes[i + j].params[2].v = parityStripeID; 389 rrdNodes[i + j].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 390 } 391 } 392 /* make a PDA for the parity unit */ 393 RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 394 parityPDA->col = asmap->parityInfo->col; 395 parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU) 396 * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); 397 parityPDA->numSector = failedPDA->numSector; 398 399 /* initialize the Rp node */ 400 rf_InitNode(rpNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 401 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rp ", allocList); 402 rpNode->params[0].p = parityPDA; 403 rpNode->params[1].p = rpBuf; 404 rpNode->params[2].v = parityStripeID; 405 rpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 406 407 /* 408 * the last and nastiest step is to assign all 409 * the parameters of the Xor node 410 */ 411 paramNum = 0; 412 for (i = 0; i < nRrdNodes; i++) { 413 /* all the Rrd nodes need to be xored together */ 414 xorNode->params[paramNum++] = rrdNodes[i].params[0]; 415 xorNode->params[paramNum++] = rrdNodes[i].params[1]; 416 } 417 for (i = 0; i < nRudNodes; i++) { 418 /* any Rud nodes that overlap the failed access need to be 419 * xored in */ 420 if (overlappingPDAs[i]) { 421 RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 422 memcpy((char *) pda, (char *) rudNodes[i].params[0].p, sizeof(RF_PhysDiskAddr_t)); 423 rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0); 424 xorNode->params[paramNum++].p = pda; 425 xorNode->params[paramNum++].p = pda->bufPtr; 426 } 427 } 428 RF_Free(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char)); 429 430 /* install parity pda as last set of params to be xor'd */ 431 xorNode->params[paramNum++].p = parityPDA; 432 xorNode->params[paramNum++].p = rpBuf; 433 434 /* 435 * the last 2 params to the recovery xor node are 436 * the failed PDA and the raidPtr 437 */ 438 xorNode->params[paramNum++].p = failedPDA; 439 xorNode->params[paramNum++].p = raidPtr; 440 RF_ASSERT(paramNum == 2 * nXorBufs + 2); 441 442 /* 443 * The xor node uses results[0] as the target buffer. 444 * Set pointer and zero the buffer. In the kernel, this 445 * may be a user buffer in which case we have to remap it. 446 */ 447 xorNode->results[0] = failedPDA->bufPtr; 448 RF_BZERO(bp, failedPDA->bufPtr, rf_RaidAddressToByte(raidPtr, 449 failedPDA->numSector)); 450 451 /* connect nodes to form graph */ 452 /* connect the header to the block node */ 453 RF_ASSERT(dag_h->numSuccedents == 1); 454 RF_ASSERT(blockNode->numAntecedents == 0); 455 dag_h->succedents[0] = blockNode; 456 457 /* connect the block node to the read nodes */ 458 RF_ASSERT(blockNode->numSuccedents == (1 + nRrdNodes + nRudNodes)); 459 RF_ASSERT(rpNode->numAntecedents == 1); 460 blockNode->succedents[0] = rpNode; 461 rpNode->antecedents[0] = blockNode; 462 rpNode->antType[0] = rf_control; 463 for (i = 0; i < nRrdNodes; i++) { 464 RF_ASSERT(rrdNodes[i].numSuccedents == 1); 465 blockNode->succedents[1 + i] = &rrdNodes[i]; 466 rrdNodes[i].antecedents[0] = blockNode; 467 rrdNodes[i].antType[0] = rf_control; 468 } 469 for (i = 0; i < nRudNodes; i++) { 470 RF_ASSERT(rudNodes[i].numSuccedents == 1); 471 blockNode->succedents[1 + nRrdNodes + i] = &rudNodes[i]; 472 rudNodes[i].antecedents[0] = blockNode; 473 rudNodes[i].antType[0] = rf_control; 474 } 475 476 /* connect the read nodes to the xor node */ 477 RF_ASSERT(xorNode->numAntecedents == (1 + nRrdNodes + nRudNodes)); 478 RF_ASSERT(rpNode->numSuccedents == 1); 479 rpNode->succedents[0] = xorNode; 480 xorNode->antecedents[0] = rpNode; 481 xorNode->antType[0] = rf_trueData; 482 for (i = 0; i < nRrdNodes; i++) { 483 RF_ASSERT(rrdNodes[i].numSuccedents == 1); 484 rrdNodes[i].succedents[0] = xorNode; 485 xorNode->antecedents[1 + i] = &rrdNodes[i]; 486 xorNode->antType[1 + i] = rf_trueData; 487 } 488 for (i = 0; i < nRudNodes; i++) { 489 RF_ASSERT(rudNodes[i].numSuccedents == 1); 490 rudNodes[i].succedents[0] = xorNode; 491 xorNode->antecedents[1 + nRrdNodes + i] = &rudNodes[i]; 492 xorNode->antType[1 + nRrdNodes + i] = rf_trueData; 493 } 494 495 /* connect the xor node to the commit node */ 496 RF_ASSERT(xorNode->numSuccedents == 1); 497 RF_ASSERT(commitNode->numAntecedents == 1); 498 xorNode->succedents[0] = commitNode; 499 commitNode->antecedents[0] = xorNode; 500 commitNode->antType[0] = rf_control; 501 502 /* connect the termNode to the commit node */ 503 RF_ASSERT(commitNode->numSuccedents == 1); 504 RF_ASSERT(termNode->numAntecedents == 1); 505 RF_ASSERT(termNode->numSuccedents == 0); 506 commitNode->succedents[0] = termNode; 507 termNode->antType[0] = rf_control; 508 termNode->antecedents[0] = commitNode; 509} 510 511#if (RF_INCLUDE_CHAINDECLUSTER > 0) 512/****************************************************************************** 513 * Create a degraded read DAG for Chained Declustering 514 * 515 * Hdr -> Nil -> R(p/s)d -> Cmt -> Trm 516 * 517 * The "Rd" node reads data from the surviving disk in the mirror pair 518 * Rpd - read of primary copy 519 * Rsd - read of secondary copy 520 * 521 * Parameters: raidPtr - description of the physical array 522 * asmap - logical & physical addresses for this access 523 * bp - buffer ptr (for holding write data) 524 * flags - general flags (e.g. disk locking) 525 * allocList - list of memory allocated in DAG creation 526 *****************************************************************************/ 527 528void 529rf_CreateRaidCDegradedReadDAG( 530 RF_Raid_t * raidPtr, 531 RF_AccessStripeMap_t * asmap, 532 RF_DagHeader_t * dag_h, 533 void *bp, 534 RF_RaidAccessFlags_t flags, 535 RF_AllocListElem_t * allocList) 536{ 537 RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode; 538 RF_StripeNum_t parityStripeID; 539 int useMirror, i, shiftable; 540 RF_ReconUnitNum_t which_ru; 541 RF_PhysDiskAddr_t *pda; 542 543 if ((asmap->numDataFailed + asmap->numParityFailed) == 0) { 544 shiftable = RF_TRUE; 545 } else { 546 shiftable = RF_FALSE; 547 } 548 useMirror = 0; 549 parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), 550 asmap->raidAddress, &which_ru); 551 552 if (rf_dagDebug) { 553 printf("[Creating RAID C degraded read DAG]\n"); 554 } 555 dag_h->creator = "RaidCDegradedReadDAG"; 556 /* alloc the Wnd nodes and the Wmir node */ 557 if (asmap->numDataFailed == 0) 558 useMirror = RF_FALSE; 559 else 560 useMirror = RF_TRUE; 561 562 /* total number of nodes = 1 + (block + commit + terminator) */ 563 RF_MallocAndAdd(nodes, 4 * sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); 564 i = 0; 565 rdNode = &nodes[i]; 566 i++; 567 blockNode = &nodes[i]; 568 i++; 569 commitNode = &nodes[i]; 570 i++; 571 termNode = &nodes[i]; 572 i++; 573 574 /* 575 * This dag can not commit until the commit node is reached. 576 * Errors prior to the commit point imply the dag has failed 577 * and must be retried. 578 */ 579 dag_h->numCommitNodes = 1; 580 dag_h->numCommits = 0; 581 dag_h->numSuccedents = 1; 582 583 /* initialize the block, commit, and terminator nodes */ 584 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 585 NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); 586 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 587 NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); 588 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, 589 NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); 590 591 pda = asmap->physInfo; 592 RF_ASSERT(pda != NULL); 593 /* parityInfo must describe entire parity unit */ 594 RF_ASSERT(asmap->parityInfo->next == NULL); 595 596 /* initialize the data node */ 597 if (!useMirror) { 598 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 599 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList); 600 if (shiftable && rf_compute_workload_shift(raidPtr, pda)) { 601 /* shift this read to the next disk in line */ 602 rdNode->params[0].p = asmap->parityInfo; 603 rdNode->params[1].p = pda->bufPtr; 604 rdNode->params[2].v = parityStripeID; 605 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 606 } else { 607 /* read primary copy */ 608 rdNode->params[0].p = pda; 609 rdNode->params[1].p = pda->bufPtr; 610 rdNode->params[2].v = parityStripeID; 611 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 612 } 613 } else { 614 /* read secondary copy of data */ 615 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 616 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList); 617 rdNode->params[0].p = asmap->parityInfo; 618 rdNode->params[1].p = pda->bufPtr; 619 rdNode->params[2].v = parityStripeID; 620 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 621 } 622 623 /* connect header to block node */ 624 RF_ASSERT(dag_h->numSuccedents == 1); 625 RF_ASSERT(blockNode->numAntecedents == 0); 626 dag_h->succedents[0] = blockNode; 627 628 /* connect block node to rdnode */ 629 RF_ASSERT(blockNode->numSuccedents == 1); 630 RF_ASSERT(rdNode->numAntecedents == 1); 631 blockNode->succedents[0] = rdNode; 632 rdNode->antecedents[0] = blockNode; 633 rdNode->antType[0] = rf_control; 634 635 /* connect rdnode to commit node */ 636 RF_ASSERT(rdNode->numSuccedents == 1); 637 RF_ASSERT(commitNode->numAntecedents == 1); 638 rdNode->succedents[0] = commitNode; 639 commitNode->antecedents[0] = rdNode; 640 commitNode->antType[0] = rf_control; 641 642 /* connect commit node to terminator */ 643 RF_ASSERT(commitNode->numSuccedents == 1); 644 RF_ASSERT(termNode->numAntecedents == 1); 645 RF_ASSERT(termNode->numSuccedents == 0); 646 commitNode->succedents[0] = termNode; 647 termNode->antecedents[0] = commitNode; 648 termNode->antType[0] = rf_control; 649} 650#endif /* (RF_INCLUDE_CHAINDECLUSTER > 0) */ 651 652#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) 653/* 654 * XXX move this elsewhere? 655 */ 656void 657rf_DD_GenerateFailedAccessASMs( 658 RF_Raid_t * raidPtr, 659 RF_AccessStripeMap_t * asmap, 660 RF_PhysDiskAddr_t ** pdap, 661 int *nNodep, 662 RF_PhysDiskAddr_t ** pqpdap, 663 int *nPQNodep, 664 RF_AllocListElem_t * allocList) 665{ 666 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 667 int PDAPerDisk, i; 668 RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; 669 int numDataCol = layoutPtr->numDataCol; 670 int state; 671 RF_SectorNum_t suoff, suend; 672 unsigned firstDataCol, napdas, count; 673 RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end = 0; 674 RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0], *ftwo = asmap->failedPDAs[1]; 675 RF_PhysDiskAddr_t *pda_p; 676 RF_PhysDiskAddr_t *phys_p; 677 RF_RaidAddr_t sosAddr; 678 679 /* determine how many pda's we will have to generate per unaccess 680 * stripe. If there is only one failed data unit, it is one; if two, 681 * possibly two, depending wether they overlap. */ 682 683 fone_start = rf_StripeUnitOffset(layoutPtr, fone->startSector); 684 fone_end = fone_start + fone->numSector; 685 686#define CONS_PDA(if,start,num) \ 687 pda_p->col = asmap->if->col; \ 688 pda_p->startSector = ((asmap->if->startSector / secPerSU) * secPerSU) + start; \ 689 pda_p->numSector = num; \ 690 pda_p->next = NULL; \ 691 RF_MallocAndAdd(pda_p->bufPtr,rf_RaidAddressToByte(raidPtr,num),(char *), allocList) 692 693 if (asmap->numDataFailed == 1) { 694 PDAPerDisk = 1; 695 state = 1; 696 RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 697 pda_p = *pqpdap; 698 /* build p */ 699 CONS_PDA(parityInfo, fone_start, fone->numSector); 700 pda_p->type = RF_PDA_TYPE_PARITY; 701 pda_p++; 702 /* build q */ 703 CONS_PDA(qInfo, fone_start, fone->numSector); 704 pda_p->type = RF_PDA_TYPE_Q; 705 } else { 706 ftwo_start = rf_StripeUnitOffset(layoutPtr, ftwo->startSector); 707 ftwo_end = ftwo_start + ftwo->numSector; 708 if (fone->numSector + ftwo->numSector > secPerSU) { 709 PDAPerDisk = 1; 710 state = 2; 711 RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 712 pda_p = *pqpdap; 713 CONS_PDA(parityInfo, 0, secPerSU); 714 pda_p->type = RF_PDA_TYPE_PARITY; 715 pda_p++; 716 CONS_PDA(qInfo, 0, secPerSU); 717 pda_p->type = RF_PDA_TYPE_Q; 718 } else { 719 PDAPerDisk = 2; 720 state = 3; 721 /* four of them, fone, then ftwo */ 722 RF_MallocAndAdd(*pqpdap, 4 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 723 pda_p = *pqpdap; 724 CONS_PDA(parityInfo, fone_start, fone->numSector); 725 pda_p->type = RF_PDA_TYPE_PARITY; 726 pda_p++; 727 CONS_PDA(qInfo, fone_start, fone->numSector); 728 pda_p->type = RF_PDA_TYPE_Q; 729 pda_p++; 730 CONS_PDA(parityInfo, ftwo_start, ftwo->numSector); 731 pda_p->type = RF_PDA_TYPE_PARITY; 732 pda_p++; 733 CONS_PDA(qInfo, ftwo_start, ftwo->numSector); 734 pda_p->type = RF_PDA_TYPE_Q; 735 } 736 } 737 /* figure out number of nonaccessed pda */ 738 napdas = PDAPerDisk * (numDataCol - asmap->numStripeUnitsAccessed - (ftwo == NULL ? 1 : 0)); 739 *nPQNodep = PDAPerDisk; 740 741 /* sweep over the over accessed pda's, figuring out the number of 742 * additional pda's to generate. Of course, skip the failed ones */ 743 744 count = 0; 745 for (pda_p = asmap->physInfo; pda_p; pda_p = pda_p->next) { 746 if ((pda_p == fone) || (pda_p == ftwo)) 747 continue; 748 suoff = rf_StripeUnitOffset(layoutPtr, pda_p->startSector); 749 suend = suoff + pda_p->numSector; 750 switch (state) { 751 case 1: /* one failed PDA to overlap */ 752 /* if a PDA doesn't contain the failed unit, it can 753 * only miss the start or end, not both */ 754 if ((suoff > fone_start) || (suend < fone_end)) 755 count++; 756 break; 757 case 2: /* whole stripe */ 758 if (suoff) /* leak at begining */ 759 count++; 760 if (suend < numDataCol) /* leak at end */ 761 count++; 762 break; 763 case 3: /* two disjoint units */ 764 if ((suoff > fone_start) || (suend < fone_end)) 765 count++; 766 if ((suoff > ftwo_start) || (suend < ftwo_end)) 767 count++; 768 break; 769 default: 770 RF_PANIC(); 771 } 772 } 773 774 napdas += count; 775 *nNodep = napdas; 776 if (napdas == 0) 777 return; /* short circuit */ 778 779 /* allocate up our list of pda's */ 780 781 RF_MallocAndAdd(pda_p, napdas * sizeof(RF_PhysDiskAddr_t), 782 (RF_PhysDiskAddr_t *), allocList); 783 *pdap = pda_p; 784 785 /* linkem together */ 786 for (i = 0; i < (napdas - 1); i++) 787 pda_p[i].next = pda_p + (i + 1); 788 789 /* march through the one's up to the first accessed disk */ 790 firstDataCol = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), asmap->physInfo->raidAddress) % numDataCol; 791 sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); 792 for (i = 0; i < firstDataCol; i++) { 793 if ((pda_p - (*pdap)) == napdas) 794 continue; 795 pda_p->type = RF_PDA_TYPE_DATA; 796 pda_p->raidAddress = sosAddr + (i * secPerSU); 797 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 798 /* skip over dead disks */ 799 if (RF_DEAD_DISK(raidPtr->Disks[pda_p->col].status)) 800 continue; 801 switch (state) { 802 case 1: /* fone */ 803 pda_p->numSector = fone->numSector; 804 pda_p->raidAddress += fone_start; 805 pda_p->startSector += fone_start; 806 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 807 break; 808 case 2: /* full stripe */ 809 pda_p->numSector = secPerSU; 810 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList); 811 break; 812 case 3: /* two slabs */ 813 pda_p->numSector = fone->numSector; 814 pda_p->raidAddress += fone_start; 815 pda_p->startSector += fone_start; 816 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 817 pda_p++; 818 pda_p->type = RF_PDA_TYPE_DATA; 819 pda_p->raidAddress = sosAddr + (i * secPerSU); 820 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 821 pda_p->numSector = ftwo->numSector; 822 pda_p->raidAddress += ftwo_start; 823 pda_p->startSector += ftwo_start; 824 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 825 break; 826 default: 827 RF_PANIC(); 828 } 829 pda_p++; 830 } 831 832 /* march through the touched stripe units */ 833 for (phys_p = asmap->physInfo; phys_p; phys_p = phys_p->next, i++) { 834 if ((phys_p == asmap->failedPDAs[0]) || (phys_p == asmap->failedPDAs[1])) 835 continue; 836 suoff = rf_StripeUnitOffset(layoutPtr, phys_p->startSector); 837 suend = suoff + phys_p->numSector; 838 switch (state) { 839 case 1: /* single buffer */ 840 if (suoff > fone_start) { 841 RF_ASSERT(suend >= fone_end); 842 /* The data read starts after the mapped 843 * access, snip off the begining */ 844 pda_p->numSector = suoff - fone_start; 845 pda_p->raidAddress = sosAddr + (i * secPerSU) + fone_start; 846 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 847 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 848 pda_p++; 849 } 850 if (suend < fone_end) { 851 RF_ASSERT(suoff <= fone_start); 852 /* The data read stops before the end of the 853 * failed access, extend */ 854 pda_p->numSector = fone_end - suend; 855 pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ 856 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 857 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 858 pda_p++; 859 } 860 break; 861 case 2: /* whole stripe unit */ 862 RF_ASSERT((suoff == 0) || (suend == secPerSU)); 863 if (suend < secPerSU) { /* short read, snip from end 864 * on */ 865 pda_p->numSector = secPerSU - suend; 866 pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ 867 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 868 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 869 pda_p++; 870 } else 871 if (suoff > 0) { /* short at front */ 872 pda_p->numSector = suoff; 873 pda_p->raidAddress = sosAddr + (i * secPerSU); 874 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 875 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 876 pda_p++; 877 } 878 break; 879 case 3: /* two nonoverlapping failures */ 880 if ((suoff > fone_start) || (suend < fone_end)) { 881 if (suoff > fone_start) { 882 RF_ASSERT(suend >= fone_end); 883 /* The data read starts after the 884 * mapped access, snip off the 885 * begining */ 886 pda_p->numSector = suoff - fone_start; 887 pda_p->raidAddress = sosAddr + (i * secPerSU) + fone_start; 888 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 889 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 890 pda_p++; 891 } 892 if (suend < fone_end) { 893 RF_ASSERT(suoff <= fone_start); 894 /* The data read stops before the end 895 * of the failed access, extend */ 896 pda_p->numSector = fone_end - suend; 897 pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ 898 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 899 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 900 pda_p++; 901 } 902 } 903 if ((suoff > ftwo_start) || (suend < ftwo_end)) { 904 if (suoff > ftwo_start) { 905 RF_ASSERT(suend >= ftwo_end); 906 /* The data read starts after the 907 * mapped access, snip off the 908 * begining */ 909 pda_p->numSector = suoff - ftwo_start; 910 pda_p->raidAddress = sosAddr + (i * secPerSU) + ftwo_start; 911 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 912 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 913 pda_p++; 914 } 915 if (suend < ftwo_end) { 916 RF_ASSERT(suoff <= ftwo_start); 917 /* The data read stops before the end 918 * of the failed access, extend */ 919 pda_p->numSector = ftwo_end - suend; 920 pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ 921 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 922 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 923 pda_p++; 924 } 925 } 926 break; 927 default: 928 RF_PANIC(); 929 } 930 } 931 932 /* after the last accessed disk */ 933 for (; i < numDataCol; i++) { 934 if ((pda_p - (*pdap)) == napdas) 935 continue; 936 pda_p->type = RF_PDA_TYPE_DATA; 937 pda_p->raidAddress = sosAddr + (i * secPerSU); 938 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 939 /* skip over dead disks */ 940 if (RF_DEAD_DISK(raidPtr->Disks[pda_p->col].status)) 941 continue; 942 switch (state) { 943 case 1: /* fone */ 944 pda_p->numSector = fone->numSector; 945 pda_p->raidAddress += fone_start; 946 pda_p->startSector += fone_start; 947 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 948 break; 949 case 2: /* full stripe */ 950 pda_p->numSector = secPerSU; 951 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList); 952 break; 953 case 3: /* two slabs */ 954 pda_p->numSector = fone->numSector; 955 pda_p->raidAddress += fone_start; 956 pda_p->startSector += fone_start; 957 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 958 pda_p++; 959 pda_p->type = RF_PDA_TYPE_DATA; 960 pda_p->raidAddress = sosAddr + (i * secPerSU); 961 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 962 pda_p->numSector = ftwo->numSector; 963 pda_p->raidAddress += ftwo_start; 964 pda_p->startSector += ftwo_start; 965 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 966 break; 967 default: 968 RF_PANIC(); 969 } 970 pda_p++; 971 } 972 973 RF_ASSERT(pda_p - *pdap == napdas); 974 return; 975} 976#define INIT_DISK_NODE(node,name) \ 977rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2,1,4,0, dag_h, name, allocList); \ 978(node)->succedents[0] = unblockNode; \ 979(node)->succedents[1] = recoveryNode; \ 980(node)->antecedents[0] = blockNode; \ 981(node)->antType[0] = rf_control 982 983#define DISK_NODE_PARAMS(_node_,_p_) \ 984 (_node_).params[0].p = _p_ ; \ 985 (_node_).params[1].p = (_p_)->bufPtr; \ 986 (_node_).params[2].v = parityStripeID; \ 987 (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru) 988 989void 990rf_DoubleDegRead( 991 RF_Raid_t * raidPtr, 992 RF_AccessStripeMap_t * asmap, 993 RF_DagHeader_t * dag_h, 994 void *bp, 995 RF_RaidAccessFlags_t flags, 996 RF_AllocListElem_t * allocList, 997 char *redundantReadNodeName, 998 char *recoveryNodeName, 999 int (*recovFunc) (RF_DagNode_t *)) 1000{ 1001 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 1002 RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *recoveryNode, *blockNode, 1003 *unblockNode, *rpNodes, *rqNodes, *termNode; 1004 RF_PhysDiskAddr_t *pda, *pqPDAs; 1005 RF_PhysDiskAddr_t *npdas; 1006 int nNodes, nRrdNodes, nRudNodes, i; 1007 RF_ReconUnitNum_t which_ru; 1008 int nReadNodes, nPQNodes; 1009 RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0]; 1010 RF_PhysDiskAddr_t *failedPDAtwo = asmap->failedPDAs[1]; 1011 RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru); 1012 1013 if (rf_dagDebug) 1014 printf("[Creating Double Degraded Read DAG]\n"); 1015 rf_DD_GenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, &pqPDAs, &nPQNodes, allocList); 1016 1017 nRudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed); 1018 nReadNodes = nRrdNodes + nRudNodes + 2 * nPQNodes; 1019 nNodes = 4 /* block, unblock, recovery, term */ + nReadNodes; 1020 1021 RF_MallocAndAdd(nodes, nNodes * sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); 1022 i = 0; 1023 blockNode = &nodes[i]; 1024 i += 1; 1025 unblockNode = &nodes[i]; 1026 i += 1; 1027 recoveryNode = &nodes[i]; 1028 i += 1; 1029 termNode = &nodes[i]; 1030 i += 1; 1031 rudNodes = &nodes[i]; 1032 i += nRudNodes; 1033 rrdNodes = &nodes[i]; 1034 i += nRrdNodes; 1035 rpNodes = &nodes[i]; 1036 i += nPQNodes; 1037 rqNodes = &nodes[i]; 1038 i += nPQNodes; 1039 RF_ASSERT(i == nNodes); 1040 1041 dag_h->numSuccedents = 1; 1042 dag_h->succedents[0] = blockNode; 1043 dag_h->creator = "DoubleDegRead"; 1044 dag_h->numCommits = 0; 1045 dag_h->numCommitNodes = 1; /* unblock */ 1046 1047 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 2, 0, 0, dag_h, "Trm", allocList); 1048 termNode->antecedents[0] = unblockNode; 1049 termNode->antType[0] = rf_control; 1050 termNode->antecedents[1] = recoveryNode; 1051 termNode->antType[1] = rf_control; 1052 1053 /* init the block and unblock nodes */ 1054 /* The block node has all nodes except itself, unblock and recovery as 1055 * successors. Similarly for predecessors of the unblock. */ 1056 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, "Nil", allocList); 1057 rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nReadNodes, 0, 0, dag_h, "Nil", allocList); 1058 1059 for (i = 0; i < nReadNodes; i++) { 1060 blockNode->succedents[i] = rudNodes + i; 1061 unblockNode->antecedents[i] = rudNodes + i; 1062 unblockNode->antType[i] = rf_control; 1063 } 1064 unblockNode->succedents[0] = termNode; 1065 1066 /* The recovery node has all the reads as predecessors, and the term 1067 * node as successors. It gets a pda as a param from each of the read 1068 * nodes plus the raidPtr. For each failed unit is has a result pda. */ 1069 rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, rf_NullNodeUndoFunc, NULL, 1070 1, /* succesors */ 1071 nReadNodes, /* preds */ 1072 nReadNodes + 2, /* params */ 1073 asmap->numDataFailed, /* results */ 1074 dag_h, recoveryNodeName, allocList); 1075 1076 recoveryNode->succedents[0] = termNode; 1077 for (i = 0; i < nReadNodes; i++) { 1078 recoveryNode->antecedents[i] = rudNodes + i; 1079 recoveryNode->antType[i] = rf_trueData; 1080 } 1081 1082 /* build the read nodes, then come back and fill in recovery params 1083 * and results */ 1084 pda = asmap->physInfo; 1085 for (i = 0; i < nRudNodes; pda = pda->next) { 1086 if ((pda == failedPDA) || (pda == failedPDAtwo)) 1087 continue; 1088 INIT_DISK_NODE(rudNodes + i, "Rud"); 1089 RF_ASSERT(pda); 1090 DISK_NODE_PARAMS(rudNodes[i], pda); 1091 i++; 1092 } 1093 1094 pda = npdas; 1095 for (i = 0; i < nRrdNodes; i++, pda = pda->next) { 1096 INIT_DISK_NODE(rrdNodes + i, "Rrd"); 1097 RF_ASSERT(pda); 1098 DISK_NODE_PARAMS(rrdNodes[i], pda); 1099 } 1100 1101 /* redundancy pdas */ 1102 pda = pqPDAs; 1103 INIT_DISK_NODE(rpNodes, "Rp"); 1104 RF_ASSERT(pda); 1105 DISK_NODE_PARAMS(rpNodes[0], pda); 1106 pda++; 1107 INIT_DISK_NODE(rqNodes, redundantReadNodeName); 1108 RF_ASSERT(pda); 1109 DISK_NODE_PARAMS(rqNodes[0], pda); 1110 if (nPQNodes == 2) { 1111 pda++; 1112 INIT_DISK_NODE(rpNodes + 1, "Rp"); 1113 RF_ASSERT(pda); 1114 DISK_NODE_PARAMS(rpNodes[1], pda); 1115 pda++; 1116 INIT_DISK_NODE(rqNodes + 1, redundantReadNodeName); 1117 RF_ASSERT(pda); 1118 DISK_NODE_PARAMS(rqNodes[1], pda); 1119 } 1120 /* fill in recovery node params */ 1121 for (i = 0; i < nReadNodes; i++) 1122 recoveryNode->params[i] = rudNodes[i].params[0]; /* pda */ 1123 recoveryNode->params[i++].p = (void *) raidPtr; 1124 recoveryNode->params[i++].p = (void *) asmap; 1125 recoveryNode->results[0] = failedPDA; 1126 if (asmap->numDataFailed == 2) 1127 recoveryNode->results[1] = failedPDAtwo; 1128 1129 /* zero fill the target data buffers? */ 1130} 1131 1132#endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) */ 1133