rf_dagdegrd.c revision 1.20
1/* $NetBSD: rf_dagdegrd.c,v 1.20 2004/03/18 16:40:05 oster Exp $ */ 2/* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29/* 30 * rf_dagdegrd.c 31 * 32 * code for creating degraded read DAGs 33 */ 34 35#include <sys/cdefs.h> 36__KERNEL_RCSID(0, "$NetBSD: rf_dagdegrd.c,v 1.20 2004/03/18 16:40:05 oster Exp $"); 37 38#include <dev/raidframe/raidframevar.h> 39 40#include "rf_archs.h" 41#include "rf_raid.h" 42#include "rf_dag.h" 43#include "rf_dagutils.h" 44#include "rf_dagfuncs.h" 45#include "rf_debugMem.h" 46#include "rf_general.h" 47#include "rf_dagdegrd.h" 48 49 50/****************************************************************************** 51 * 52 * General comments on DAG creation: 53 * 54 * All DAGs in this file use roll-away error recovery. Each DAG has a single 55 * commit node, usually called "Cmt." If an error occurs before the Cmt node 56 * is reached, the execution engine will halt forward execution and work 57 * backward through the graph, executing the undo functions. Assuming that 58 * each node in the graph prior to the Cmt node are undoable and atomic - or - 59 * does not make changes to permanent state, the graph will fail atomically. 60 * If an error occurs after the Cmt node executes, the engine will roll-forward 61 * through the graph, blindly executing nodes until it reaches the end. 62 * If a graph reaches the end, it is assumed to have completed successfully. 63 * 64 * A graph has only 1 Cmt node. 65 * 66 */ 67 68 69/****************************************************************************** 70 * 71 * The following wrappers map the standard DAG creation interface to the 72 * DAG creation routines. Additionally, these wrappers enable experimentation 73 * with new DAG structures by providing an extra level of indirection, allowing 74 * the DAG creation routines to be replaced at this single point. 75 */ 76 77void 78rf_CreateRaidFiveDegradedReadDAG(RF_Raid_t *raidPtr, 79 RF_AccessStripeMap_t *asmap, 80 RF_DagHeader_t *dag_h, 81 void *bp, 82 RF_RaidAccessFlags_t flags, 83 RF_AllocListElem_t *allocList) 84{ 85 rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 86 &rf_xorRecoveryFuncs); 87} 88 89 90/****************************************************************************** 91 * 92 * DAG creation code begins here 93 */ 94 95 96/****************************************************************************** 97 * Create a degraded read DAG for RAID level 1 98 * 99 * Hdr -> Nil -> R(p/s)d -> Commit -> Trm 100 * 101 * The "Rd" node reads data from the surviving disk in the mirror pair 102 * Rpd - read of primary copy 103 * Rsd - read of secondary copy 104 * 105 * Parameters: raidPtr - description of the physical array 106 * asmap - logical & physical addresses for this access 107 * bp - buffer ptr (for holding write data) 108 * flags - general flags (e.g. disk locking) 109 * allocList - list of memory allocated in DAG creation 110 *****************************************************************************/ 111 112void 113rf_CreateRaidOneDegradedReadDAG(RF_Raid_t *raidPtr, 114 RF_AccessStripeMap_t *asmap, 115 RF_DagHeader_t *dag_h, 116 void *bp, 117 RF_RaidAccessFlags_t flags, 118 RF_AllocListElem_t *allocList) 119{ 120 RF_DagNode_t *rdNode, *blockNode, *commitNode, *termNode; 121 RF_StripeNum_t parityStripeID; 122 RF_ReconUnitNum_t which_ru; 123 RF_PhysDiskAddr_t *pda; 124 int useMirror; 125 126 useMirror = 0; 127 parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), 128 asmap->raidAddress, &which_ru); 129#if RF_DEBUG_DAG 130 if (rf_dagDebug) { 131 printf("[Creating RAID level 1 degraded read DAG]\n"); 132 } 133#endif 134 dag_h->creator = "RaidOneDegradedReadDAG"; 135 /* alloc the Wnd nodes and the Wmir node */ 136 if (asmap->numDataFailed == 0) 137 useMirror = RF_FALSE; 138 else 139 useMirror = RF_TRUE; 140 141 /* total number of nodes = 1 + (block + commit + terminator) */ 142 143 rdNode = rf_AllocDAGNode(); 144 rdNode->list_next = dag_h->nodes; 145 dag_h->nodes = rdNode; 146 147 blockNode = rf_AllocDAGNode(); 148 blockNode->list_next = dag_h->nodes; 149 dag_h->nodes = blockNode; 150 151 commitNode = rf_AllocDAGNode(); 152 commitNode->list_next = dag_h->nodes; 153 dag_h->nodes = commitNode; 154 155 termNode = rf_AllocDAGNode(); 156 termNode->list_next = dag_h->nodes; 157 dag_h->nodes = termNode; 158 159 /* this dag can not commit until the commit node is reached. errors 160 * prior to the commit point imply the dag has failed and must be 161 * retried */ 162 dag_h->numCommitNodes = 1; 163 dag_h->numCommits = 0; 164 dag_h->numSuccedents = 1; 165 166 /* initialize the block, commit, and terminator nodes */ 167 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 168 NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); 169 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 170 NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); 171 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, 172 NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); 173 174 pda = asmap->physInfo; 175 RF_ASSERT(pda != NULL); 176 /* parityInfo must describe entire parity unit */ 177 RF_ASSERT(asmap->parityInfo->next == NULL); 178 179 /* initialize the data node */ 180 if (!useMirror) { 181 /* read primary copy of data */ 182 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 183 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList); 184 rdNode->params[0].p = pda; 185 rdNode->params[1].p = pda->bufPtr; 186 rdNode->params[2].v = parityStripeID; 187 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 188 which_ru); 189 } else { 190 /* read secondary copy of data */ 191 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 192 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList); 193 rdNode->params[0].p = asmap->parityInfo; 194 rdNode->params[1].p = pda->bufPtr; 195 rdNode->params[2].v = parityStripeID; 196 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 197 which_ru); 198 } 199 200 /* connect header to block node */ 201 RF_ASSERT(dag_h->numSuccedents == 1); 202 RF_ASSERT(blockNode->numAntecedents == 0); 203 dag_h->succedents[0] = blockNode; 204 205 /* connect block node to rdnode */ 206 RF_ASSERT(blockNode->numSuccedents == 1); 207 RF_ASSERT(rdNode->numAntecedents == 1); 208 blockNode->succedents[0] = rdNode; 209 rdNode->antecedents[0] = blockNode; 210 rdNode->antType[0] = rf_control; 211 212 /* connect rdnode to commit node */ 213 RF_ASSERT(rdNode->numSuccedents == 1); 214 RF_ASSERT(commitNode->numAntecedents == 1); 215 rdNode->succedents[0] = commitNode; 216 commitNode->antecedents[0] = rdNode; 217 commitNode->antType[0] = rf_control; 218 219 /* connect commit node to terminator */ 220 RF_ASSERT(commitNode->numSuccedents == 1); 221 RF_ASSERT(termNode->numAntecedents == 1); 222 RF_ASSERT(termNode->numSuccedents == 0); 223 commitNode->succedents[0] = termNode; 224 termNode->antecedents[0] = commitNode; 225 termNode->antType[0] = rf_control; 226} 227 228 229 230/****************************************************************************** 231 * 232 * creates a DAG to perform a degraded-mode read of data within one stripe. 233 * This DAG is as follows: 234 * 235 * Hdr -> Block -> Rud -> Xor -> Cmt -> T 236 * -> Rrd -> 237 * -> Rp --> 238 * 239 * Each R node is a successor of the L node 240 * One successor arc from each R node goes to C, and the other to X 241 * There is one Rud for each chunk of surviving user data requested by the 242 * user, and one Rrd for each chunk of surviving user data _not_ being read by 243 * the user 244 * R = read, ud = user data, rd = recovery (surviving) data, p = parity 245 * X = XOR, C = Commit, T = terminate 246 * 247 * The block node guarantees a single source node. 248 * 249 * Note: The target buffer for the XOR node is set to the actual user buffer 250 * where the failed data is supposed to end up. This buffer is zero'd by the 251 * code here. Thus, if you create a degraded read dag, use it, and then 252 * re-use, you have to be sure to zero the target buffer prior to the re-use. 253 * 254 * The recfunc argument at the end specifies the name and function used for 255 * the redundancy 256 * recovery function. 257 * 258 *****************************************************************************/ 259 260void 261rf_CreateDegradedReadDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, 262 RF_DagHeader_t *dag_h, void *bp, 263 RF_RaidAccessFlags_t flags, 264 RF_AllocListElem_t *allocList, 265 const RF_RedFuncs_t *recFunc) 266{ 267 RF_DagNode_t *rudNodes, *rrdNodes, *xorNode, *blockNode; 268 RF_DagNode_t *commitNode, *rpNode, *termNode; 269 RF_DagNode_t *tmpNode, *tmprudNode, *tmprrdNode; 270 int nNodes, nRrdNodes, nRudNodes, nXorBufs, i; 271 int j, paramNum; 272 RF_SectorCount_t sectorsPerSU; 273 RF_ReconUnitNum_t which_ru; 274 char *overlappingPDAs;/* a temporary array of flags */ 275 RF_AccessStripeMapHeader_t *new_asm_h[2]; 276 RF_PhysDiskAddr_t *pda, *parityPDA; 277 RF_StripeNum_t parityStripeID; 278 RF_PhysDiskAddr_t *failedPDA; 279 RF_RaidLayout_t *layoutPtr; 280 char *rpBuf; 281 282 layoutPtr = &(raidPtr->Layout); 283 /* failedPDA points to the pda within the asm that targets the failed 284 * disk */ 285 failedPDA = asmap->failedPDAs[0]; 286 parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, 287 asmap->raidAddress, &which_ru); 288 sectorsPerSU = layoutPtr->sectorsPerStripeUnit; 289 290#if RF_DEBUG_DAG 291 if (rf_dagDebug) { 292 printf("[Creating degraded read DAG]\n"); 293 } 294#endif 295 RF_ASSERT(asmap->numDataFailed == 1); 296 dag_h->creator = "DegradedReadDAG"; 297 298 /* 299 * generate two ASMs identifying the surviving data we need 300 * in order to recover the lost data 301 */ 302 303 /* overlappingPDAs array must be zero'd */ 304 RF_Malloc(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char), (char *)); 305 rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h, &nXorBufs, 306 &rpBuf, overlappingPDAs, allocList); 307 308 /* 309 * create all the nodes at once 310 * 311 * -1 because no access is generated for the failed pda 312 */ 313 nRudNodes = asmap->numStripeUnitsAccessed - 1; 314 nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) + 315 ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0); 316 nNodes = 5 + nRudNodes + nRrdNodes; /* lock, unlock, xor, Rp, Rud, 317 * Rrd */ 318 319 blockNode = rf_AllocDAGNode(); 320 blockNode->list_next = dag_h->nodes; 321 dag_h->nodes = blockNode; 322 323 commitNode = rf_AllocDAGNode(); 324 commitNode->list_next = dag_h->nodes; 325 dag_h->nodes = commitNode; 326 327 xorNode = rf_AllocDAGNode(); 328 xorNode->list_next = dag_h->nodes; 329 dag_h->nodes = xorNode; 330 331 rpNode = rf_AllocDAGNode(); 332 rpNode->list_next = dag_h->nodes; 333 dag_h->nodes = rpNode; 334 335 termNode = rf_AllocDAGNode(); 336 termNode->list_next = dag_h->nodes; 337 dag_h->nodes = termNode; 338 339 for (i = 0; i < nRudNodes; i++) { 340 tmpNode = rf_AllocDAGNode(); 341 tmpNode->list_next = dag_h->nodes; 342 dag_h->nodes = tmpNode; 343 } 344 rudNodes = dag_h->nodes; 345 346 for (i = 0; i < nRrdNodes; i++) { 347 tmpNode = rf_AllocDAGNode(); 348 tmpNode->list_next = dag_h->nodes; 349 dag_h->nodes = tmpNode; 350 } 351 rrdNodes = dag_h->nodes; 352 353 /* initialize nodes */ 354 dag_h->numCommitNodes = 1; 355 dag_h->numCommits = 0; 356 /* this dag can not commit until the commit node is reached errors 357 * prior to the commit point imply the dag has failed */ 358 dag_h->numSuccedents = 1; 359 360 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 361 NULL, nRudNodes + nRrdNodes + 1, 0, 0, 0, dag_h, "Nil", allocList); 362 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 363 NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); 364 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, 365 NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); 366 rf_InitNode(xorNode, rf_wait, RF_FALSE, recFunc->simple, rf_NullNodeUndoFunc, 367 NULL, 1, nRudNodes + nRrdNodes + 1, 2 * nXorBufs + 2, 1, dag_h, 368 recFunc->SimpleName, allocList); 369 370 /* fill in the Rud nodes */ 371 tmprudNode = rudNodes; 372 for (pda = asmap->physInfo, i = 0; i < nRudNodes; i++, pda = pda->next) { 373 if (pda == failedPDA) { 374 i--; 375 continue; 376 } 377 rf_InitNode(tmprudNode, rf_wait, RF_FALSE, rf_DiskReadFunc, 378 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, 379 "Rud", allocList); 380 RF_ASSERT(pda); 381 tmprudNode->params[0].p = pda; 382 tmprudNode->params[1].p = pda->bufPtr; 383 tmprudNode->params[2].v = parityStripeID; 384 tmprudNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 385 tmprudNode = tmprudNode->list_next; 386 } 387 388 /* fill in the Rrd nodes */ 389 i = 0; 390 tmprrdNode = rrdNodes; 391 if (new_asm_h[0]) { 392 for (pda = new_asm_h[0]->stripeMap->physInfo; 393 i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed; 394 i++, pda = pda->next) { 395 rf_InitNode(tmprrdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, 396 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, 397 dag_h, "Rrd", allocList); 398 RF_ASSERT(pda); 399 tmprrdNode->params[0].p = pda; 400 tmprrdNode->params[1].p = pda->bufPtr; 401 tmprrdNode->params[2].v = parityStripeID; 402 tmprrdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 403 tmprrdNode = tmprrdNode->list_next; 404 } 405 } 406 if (new_asm_h[1]) { 407 /* tmprrdNode = rrdNodes; */ /* don't set this here -- old code was using i+j, which means 408 we need to just continue using tmprrdNode for the next 'j' elements. */ 409 for (j = 0, pda = new_asm_h[1]->stripeMap->physInfo; 410 j < new_asm_h[1]->stripeMap->numStripeUnitsAccessed; 411 j++, pda = pda->next) { 412 rf_InitNode(tmprrdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, 413 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, 414 dag_h, "Rrd", allocList); 415 RF_ASSERT(pda); 416 tmprrdNode->params[0].p = pda; 417 tmprrdNode->params[1].p = pda->bufPtr; 418 tmprrdNode->params[2].v = parityStripeID; 419 tmprrdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 420 tmprrdNode = tmprrdNode->list_next; 421 } 422 } 423 /* make a PDA for the parity unit */ 424 RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 425 parityPDA->col = asmap->parityInfo->col; 426 parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU) 427 * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); 428 parityPDA->numSector = failedPDA->numSector; 429 430 /* initialize the Rp node */ 431 rf_InitNode(rpNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 432 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rp ", allocList); 433 rpNode->params[0].p = parityPDA; 434 rpNode->params[1].p = rpBuf; 435 rpNode->params[2].v = parityStripeID; 436 rpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 437 438 /* 439 * the last and nastiest step is to assign all 440 * the parameters of the Xor node 441 */ 442 paramNum = 0; 443 tmprrdNode = rrdNodes; 444 for (i = 0; i < nRrdNodes; i++) { 445 /* all the Rrd nodes need to be xored together */ 446 xorNode->params[paramNum++] = tmprrdNode->params[0]; 447 xorNode->params[paramNum++] = tmprrdNode->params[1]; 448 tmprrdNode = tmprrdNode->list_next; 449 } 450 tmprudNode = rudNodes; 451 for (i = 0; i < nRudNodes; i++) { 452 /* any Rud nodes that overlap the failed access need to be 453 * xored in */ 454 if (overlappingPDAs[i]) { 455 RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 456 memcpy((char *) pda, (char *) tmprudNode->params[0].p, sizeof(RF_PhysDiskAddr_t)); 457 rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0); 458 xorNode->params[paramNum++].p = pda; 459 xorNode->params[paramNum++].p = pda->bufPtr; 460 } 461 tmprudNode = tmprudNode->list_next; 462 } 463 RF_Free(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char)); 464 465 /* install parity pda as last set of params to be xor'd */ 466 xorNode->params[paramNum++].p = parityPDA; 467 xorNode->params[paramNum++].p = rpBuf; 468 469 /* 470 * the last 2 params to the recovery xor node are 471 * the failed PDA and the raidPtr 472 */ 473 xorNode->params[paramNum++].p = failedPDA; 474 xorNode->params[paramNum++].p = raidPtr; 475 RF_ASSERT(paramNum == 2 * nXorBufs + 2); 476 477 /* 478 * The xor node uses results[0] as the target buffer. 479 * Set pointer and zero the buffer. In the kernel, this 480 * may be a user buffer in which case we have to remap it. 481 */ 482 xorNode->results[0] = failedPDA->bufPtr; 483 memset(failedPDA->bufPtr, 0, rf_RaidAddressToByte(raidPtr, 484 failedPDA->numSector)); 485 486 /* connect nodes to form graph */ 487 /* connect the header to the block node */ 488 RF_ASSERT(dag_h->numSuccedents == 1); 489 RF_ASSERT(blockNode->numAntecedents == 0); 490 dag_h->succedents[0] = blockNode; 491 492 /* connect the block node to the read nodes */ 493 RF_ASSERT(blockNode->numSuccedents == (1 + nRrdNodes + nRudNodes)); 494 RF_ASSERT(rpNode->numAntecedents == 1); 495 blockNode->succedents[0] = rpNode; 496 rpNode->antecedents[0] = blockNode; 497 rpNode->antType[0] = rf_control; 498 tmprrdNode = rrdNodes; 499 for (i = 0; i < nRrdNodes; i++) { 500 RF_ASSERT(tmprrdNode->numSuccedents == 1); 501 blockNode->succedents[1 + i] = tmprrdNode; 502 tmprrdNode->antecedents[0] = blockNode; 503 tmprrdNode->antType[0] = rf_control; 504 tmprrdNode = tmprrdNode->list_next; 505 } 506 tmprudNode = rudNodes; 507 for (i = 0; i < nRudNodes; i++) { 508 RF_ASSERT(tmprudNode->numSuccedents == 1); 509 blockNode->succedents[1 + nRrdNodes + i] = tmprudNode; 510 tmprudNode->antecedents[0] = blockNode; 511 tmprudNode->antType[0] = rf_control; 512 tmprudNode = tmprudNode->list_next; 513 } 514 515 /* connect the read nodes to the xor node */ 516 RF_ASSERT(xorNode->numAntecedents == (1 + nRrdNodes + nRudNodes)); 517 RF_ASSERT(rpNode->numSuccedents == 1); 518 rpNode->succedents[0] = xorNode; 519 xorNode->antecedents[0] = rpNode; 520 xorNode->antType[0] = rf_trueData; 521 tmprrdNode = rrdNodes; 522 for (i = 0; i < nRrdNodes; i++) { 523 RF_ASSERT(rrdNode->numSuccedents == 1); 524 tmprrdNode->succedents[0] = xorNode; 525 xorNode->antecedents[1 + i] = tmprrdNode; 526 xorNode->antType[1 + i] = rf_trueData; 527 tmprrdNode = tmprrdNode->list_next; 528 } 529 tmprudNode = rudNodes; 530 for (i = 0; i < nRudNodes; i++) { 531 RF_ASSERT(tmprudNode->numSuccedents == 1); 532 tmprudNode->succedents[0] = xorNode; 533 xorNode->antecedents[1 + nRrdNodes + i] = tmprudNode; 534 xorNode->antType[1 + nRrdNodes + i] = rf_trueData; 535 tmprudNode = tmprudNode->list_next; 536 } 537 538 /* connect the xor node to the commit node */ 539 RF_ASSERT(xorNode->numSuccedents == 1); 540 RF_ASSERT(commitNode->numAntecedents == 1); 541 xorNode->succedents[0] = commitNode; 542 commitNode->antecedents[0] = xorNode; 543 commitNode->antType[0] = rf_control; 544 545 /* connect the termNode to the commit node */ 546 RF_ASSERT(commitNode->numSuccedents == 1); 547 RF_ASSERT(termNode->numAntecedents == 1); 548 RF_ASSERT(termNode->numSuccedents == 0); 549 commitNode->succedents[0] = termNode; 550 termNode->antType[0] = rf_control; 551 termNode->antecedents[0] = commitNode; 552} 553 554#if (RF_INCLUDE_CHAINDECLUSTER > 0) 555/****************************************************************************** 556 * Create a degraded read DAG for Chained Declustering 557 * 558 * Hdr -> Nil -> R(p/s)d -> Cmt -> Trm 559 * 560 * The "Rd" node reads data from the surviving disk in the mirror pair 561 * Rpd - read of primary copy 562 * Rsd - read of secondary copy 563 * 564 * Parameters: raidPtr - description of the physical array 565 * asmap - logical & physical addresses for this access 566 * bp - buffer ptr (for holding write data) 567 * flags - general flags (e.g. disk locking) 568 * allocList - list of memory allocated in DAG creation 569 *****************************************************************************/ 570 571void 572rf_CreateRaidCDegradedReadDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, 573 RF_DagHeader_t *dag_h, void *bp, 574 RF_RaidAccessFlags_t flags, 575 RF_AllocListElem_t *allocList) 576{ 577 RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode; 578 RF_StripeNum_t parityStripeID; 579 int useMirror, i, shiftable; 580 RF_ReconUnitNum_t which_ru; 581 RF_PhysDiskAddr_t *pda; 582 583 if ((asmap->numDataFailed + asmap->numParityFailed) == 0) { 584 shiftable = RF_TRUE; 585 } else { 586 shiftable = RF_FALSE; 587 } 588 useMirror = 0; 589 parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), 590 asmap->raidAddress, &which_ru); 591 592#if RF_DEBUG_DAG 593 if (rf_dagDebug) { 594 printf("[Creating RAID C degraded read DAG]\n"); 595 } 596#endif 597 dag_h->creator = "RaidCDegradedReadDAG"; 598 /* alloc the Wnd nodes and the Wmir node */ 599 if (asmap->numDataFailed == 0) 600 useMirror = RF_FALSE; 601 else 602 useMirror = RF_TRUE; 603 604 /* total number of nodes = 1 + (block + commit + terminator) */ 605 RF_MallocAndAdd(nodes, 4 * sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); 606 i = 0; 607 rdNode = &nodes[i]; 608 i++; 609 blockNode = &nodes[i]; 610 i++; 611 commitNode = &nodes[i]; 612 i++; 613 termNode = &nodes[i]; 614 i++; 615 616 /* 617 * This dag can not commit until the commit node is reached. 618 * Errors prior to the commit point imply the dag has failed 619 * and must be retried. 620 */ 621 dag_h->numCommitNodes = 1; 622 dag_h->numCommits = 0; 623 dag_h->numSuccedents = 1; 624 625 /* initialize the block, commit, and terminator nodes */ 626 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 627 NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); 628 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, 629 NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); 630 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, 631 NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); 632 633 pda = asmap->physInfo; 634 RF_ASSERT(pda != NULL); 635 /* parityInfo must describe entire parity unit */ 636 RF_ASSERT(asmap->parityInfo->next == NULL); 637 638 /* initialize the data node */ 639 if (!useMirror) { 640 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 641 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList); 642 if (shiftable && rf_compute_workload_shift(raidPtr, pda)) { 643 /* shift this read to the next disk in line */ 644 rdNode->params[0].p = asmap->parityInfo; 645 rdNode->params[1].p = pda->bufPtr; 646 rdNode->params[2].v = parityStripeID; 647 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 648 } else { 649 /* read primary copy */ 650 rdNode->params[0].p = pda; 651 rdNode->params[1].p = pda->bufPtr; 652 rdNode->params[2].v = parityStripeID; 653 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 654 } 655 } else { 656 /* read secondary copy of data */ 657 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, 658 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList); 659 rdNode->params[0].p = asmap->parityInfo; 660 rdNode->params[1].p = pda->bufPtr; 661 rdNode->params[2].v = parityStripeID; 662 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 663 } 664 665 /* connect header to block node */ 666 RF_ASSERT(dag_h->numSuccedents == 1); 667 RF_ASSERT(blockNode->numAntecedents == 0); 668 dag_h->succedents[0] = blockNode; 669 670 /* connect block node to rdnode */ 671 RF_ASSERT(blockNode->numSuccedents == 1); 672 RF_ASSERT(rdNode->numAntecedents == 1); 673 blockNode->succedents[0] = rdNode; 674 rdNode->antecedents[0] = blockNode; 675 rdNode->antType[0] = rf_control; 676 677 /* connect rdnode to commit node */ 678 RF_ASSERT(rdNode->numSuccedents == 1); 679 RF_ASSERT(commitNode->numAntecedents == 1); 680 rdNode->succedents[0] = commitNode; 681 commitNode->antecedents[0] = rdNode; 682 commitNode->antType[0] = rf_control; 683 684 /* connect commit node to terminator */ 685 RF_ASSERT(commitNode->numSuccedents == 1); 686 RF_ASSERT(termNode->numAntecedents == 1); 687 RF_ASSERT(termNode->numSuccedents == 0); 688 commitNode->succedents[0] = termNode; 689 termNode->antecedents[0] = commitNode; 690 termNode->antType[0] = rf_control; 691} 692#endif /* (RF_INCLUDE_CHAINDECLUSTER > 0) */ 693 694#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) 695/* 696 * XXX move this elsewhere? 697 */ 698void 699rf_DD_GenerateFailedAccessASMs(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, 700 RF_PhysDiskAddr_t **pdap, int *nNodep, 701 RF_PhysDiskAddr_t **pqpdap, int *nPQNodep, 702 RF_AllocListElem_t *allocList) 703{ 704 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 705 int PDAPerDisk, i; 706 RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; 707 int numDataCol = layoutPtr->numDataCol; 708 int state; 709 RF_SectorNum_t suoff, suend; 710 unsigned firstDataCol, napdas, count; 711 RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end = 0; 712 RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0], *ftwo = asmap->failedPDAs[1]; 713 RF_PhysDiskAddr_t *pda_p; 714 RF_PhysDiskAddr_t *phys_p; 715 RF_RaidAddr_t sosAddr; 716 717 /* determine how many pda's we will have to generate per unaccess 718 * stripe. If there is only one failed data unit, it is one; if two, 719 * possibly two, depending wether they overlap. */ 720 721 fone_start = rf_StripeUnitOffset(layoutPtr, fone->startSector); 722 fone_end = fone_start + fone->numSector; 723 724#define CONS_PDA(if,start,num) \ 725 pda_p->col = asmap->if->col; \ 726 pda_p->startSector = ((asmap->if->startSector / secPerSU) * secPerSU) + start; \ 727 pda_p->numSector = num; \ 728 pda_p->next = NULL; \ 729 RF_MallocAndAdd(pda_p->bufPtr,rf_RaidAddressToByte(raidPtr,num),(char *), allocList) 730 731 if (asmap->numDataFailed == 1) { 732 PDAPerDisk = 1; 733 state = 1; 734 RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 735 pda_p = *pqpdap; 736 /* build p */ 737 CONS_PDA(parityInfo, fone_start, fone->numSector); 738 pda_p->type = RF_PDA_TYPE_PARITY; 739 pda_p++; 740 /* build q */ 741 CONS_PDA(qInfo, fone_start, fone->numSector); 742 pda_p->type = RF_PDA_TYPE_Q; 743 } else { 744 ftwo_start = rf_StripeUnitOffset(layoutPtr, ftwo->startSector); 745 ftwo_end = ftwo_start + ftwo->numSector; 746 if (fone->numSector + ftwo->numSector > secPerSU) { 747 PDAPerDisk = 1; 748 state = 2; 749 RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 750 pda_p = *pqpdap; 751 CONS_PDA(parityInfo, 0, secPerSU); 752 pda_p->type = RF_PDA_TYPE_PARITY; 753 pda_p++; 754 CONS_PDA(qInfo, 0, secPerSU); 755 pda_p->type = RF_PDA_TYPE_Q; 756 } else { 757 PDAPerDisk = 2; 758 state = 3; 759 /* four of them, fone, then ftwo */ 760 RF_MallocAndAdd(*pqpdap, 4 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); 761 pda_p = *pqpdap; 762 CONS_PDA(parityInfo, fone_start, fone->numSector); 763 pda_p->type = RF_PDA_TYPE_PARITY; 764 pda_p++; 765 CONS_PDA(qInfo, fone_start, fone->numSector); 766 pda_p->type = RF_PDA_TYPE_Q; 767 pda_p++; 768 CONS_PDA(parityInfo, ftwo_start, ftwo->numSector); 769 pda_p->type = RF_PDA_TYPE_PARITY; 770 pda_p++; 771 CONS_PDA(qInfo, ftwo_start, ftwo->numSector); 772 pda_p->type = RF_PDA_TYPE_Q; 773 } 774 } 775 /* figure out number of nonaccessed pda */ 776 napdas = PDAPerDisk * (numDataCol - asmap->numStripeUnitsAccessed - (ftwo == NULL ? 1 : 0)); 777 *nPQNodep = PDAPerDisk; 778 779 /* sweep over the over accessed pda's, figuring out the number of 780 * additional pda's to generate. Of course, skip the failed ones */ 781 782 count = 0; 783 for (pda_p = asmap->physInfo; pda_p; pda_p = pda_p->next) { 784 if ((pda_p == fone) || (pda_p == ftwo)) 785 continue; 786 suoff = rf_StripeUnitOffset(layoutPtr, pda_p->startSector); 787 suend = suoff + pda_p->numSector; 788 switch (state) { 789 case 1: /* one failed PDA to overlap */ 790 /* if a PDA doesn't contain the failed unit, it can 791 * only miss the start or end, not both */ 792 if ((suoff > fone_start) || (suend < fone_end)) 793 count++; 794 break; 795 case 2: /* whole stripe */ 796 if (suoff) /* leak at begining */ 797 count++; 798 if (suend < numDataCol) /* leak at end */ 799 count++; 800 break; 801 case 3: /* two disjoint units */ 802 if ((suoff > fone_start) || (suend < fone_end)) 803 count++; 804 if ((suoff > ftwo_start) || (suend < ftwo_end)) 805 count++; 806 break; 807 default: 808 RF_PANIC(); 809 } 810 } 811 812 napdas += count; 813 *nNodep = napdas; 814 if (napdas == 0) 815 return; /* short circuit */ 816 817 /* allocate up our list of pda's */ 818 819 RF_MallocAndAdd(pda_p, napdas * sizeof(RF_PhysDiskAddr_t), 820 (RF_PhysDiskAddr_t *), allocList); 821 *pdap = pda_p; 822 823 /* linkem together */ 824 for (i = 0; i < (napdas - 1); i++) 825 pda_p[i].next = pda_p + (i + 1); 826 827 /* march through the one's up to the first accessed disk */ 828 firstDataCol = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), asmap->physInfo->raidAddress) % numDataCol; 829 sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); 830 for (i = 0; i < firstDataCol; i++) { 831 if ((pda_p - (*pdap)) == napdas) 832 continue; 833 pda_p->type = RF_PDA_TYPE_DATA; 834 pda_p->raidAddress = sosAddr + (i * secPerSU); 835 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 836 /* skip over dead disks */ 837 if (RF_DEAD_DISK(raidPtr->Disks[pda_p->col].status)) 838 continue; 839 switch (state) { 840 case 1: /* fone */ 841 pda_p->numSector = fone->numSector; 842 pda_p->raidAddress += fone_start; 843 pda_p->startSector += fone_start; 844 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 845 break; 846 case 2: /* full stripe */ 847 pda_p->numSector = secPerSU; 848 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList); 849 break; 850 case 3: /* two slabs */ 851 pda_p->numSector = fone->numSector; 852 pda_p->raidAddress += fone_start; 853 pda_p->startSector += fone_start; 854 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 855 pda_p++; 856 pda_p->type = RF_PDA_TYPE_DATA; 857 pda_p->raidAddress = sosAddr + (i * secPerSU); 858 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 859 pda_p->numSector = ftwo->numSector; 860 pda_p->raidAddress += ftwo_start; 861 pda_p->startSector += ftwo_start; 862 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 863 break; 864 default: 865 RF_PANIC(); 866 } 867 pda_p++; 868 } 869 870 /* march through the touched stripe units */ 871 for (phys_p = asmap->physInfo; phys_p; phys_p = phys_p->next, i++) { 872 if ((phys_p == asmap->failedPDAs[0]) || (phys_p == asmap->failedPDAs[1])) 873 continue; 874 suoff = rf_StripeUnitOffset(layoutPtr, phys_p->startSector); 875 suend = suoff + phys_p->numSector; 876 switch (state) { 877 case 1: /* single buffer */ 878 if (suoff > fone_start) { 879 RF_ASSERT(suend >= fone_end); 880 /* The data read starts after the mapped 881 * access, snip off the begining */ 882 pda_p->numSector = suoff - fone_start; 883 pda_p->raidAddress = sosAddr + (i * secPerSU) + fone_start; 884 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 885 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 886 pda_p++; 887 } 888 if (suend < fone_end) { 889 RF_ASSERT(suoff <= fone_start); 890 /* The data read stops before the end of the 891 * failed access, extend */ 892 pda_p->numSector = fone_end - suend; 893 pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ 894 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 895 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 896 pda_p++; 897 } 898 break; 899 case 2: /* whole stripe unit */ 900 RF_ASSERT((suoff == 0) || (suend == secPerSU)); 901 if (suend < secPerSU) { /* short read, snip from end 902 * on */ 903 pda_p->numSector = secPerSU - suend; 904 pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ 905 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 906 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 907 pda_p++; 908 } else 909 if (suoff > 0) { /* short at front */ 910 pda_p->numSector = suoff; 911 pda_p->raidAddress = sosAddr + (i * secPerSU); 912 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 913 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 914 pda_p++; 915 } 916 break; 917 case 3: /* two nonoverlapping failures */ 918 if ((suoff > fone_start) || (suend < fone_end)) { 919 if (suoff > fone_start) { 920 RF_ASSERT(suend >= fone_end); 921 /* The data read starts after the 922 * mapped access, snip off the 923 * begining */ 924 pda_p->numSector = suoff - fone_start; 925 pda_p->raidAddress = sosAddr + (i * secPerSU) + fone_start; 926 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 927 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 928 pda_p++; 929 } 930 if (suend < fone_end) { 931 RF_ASSERT(suoff <= fone_start); 932 /* The data read stops before the end 933 * of the failed access, extend */ 934 pda_p->numSector = fone_end - suend; 935 pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ 936 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 937 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 938 pda_p++; 939 } 940 } 941 if ((suoff > ftwo_start) || (suend < ftwo_end)) { 942 if (suoff > ftwo_start) { 943 RF_ASSERT(suend >= ftwo_end); 944 /* The data read starts after the 945 * mapped access, snip off the 946 * begining */ 947 pda_p->numSector = suoff - ftwo_start; 948 pda_p->raidAddress = sosAddr + (i * secPerSU) + ftwo_start; 949 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 950 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 951 pda_p++; 952 } 953 if (suend < ftwo_end) { 954 RF_ASSERT(suoff <= ftwo_start); 955 /* The data read stops before the end 956 * of the failed access, extend */ 957 pda_p->numSector = ftwo_end - suend; 958 pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ 959 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 960 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 961 pda_p++; 962 } 963 } 964 break; 965 default: 966 RF_PANIC(); 967 } 968 } 969 970 /* after the last accessed disk */ 971 for (; i < numDataCol; i++) { 972 if ((pda_p - (*pdap)) == napdas) 973 continue; 974 pda_p->type = RF_PDA_TYPE_DATA; 975 pda_p->raidAddress = sosAddr + (i * secPerSU); 976 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 977 /* skip over dead disks */ 978 if (RF_DEAD_DISK(raidPtr->Disks[pda_p->col].status)) 979 continue; 980 switch (state) { 981 case 1: /* fone */ 982 pda_p->numSector = fone->numSector; 983 pda_p->raidAddress += fone_start; 984 pda_p->startSector += fone_start; 985 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 986 break; 987 case 2: /* full stripe */ 988 pda_p->numSector = secPerSU; 989 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList); 990 break; 991 case 3: /* two slabs */ 992 pda_p->numSector = fone->numSector; 993 pda_p->raidAddress += fone_start; 994 pda_p->startSector += fone_start; 995 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 996 pda_p++; 997 pda_p->type = RF_PDA_TYPE_DATA; 998 pda_p->raidAddress = sosAddr + (i * secPerSU); 999 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); 1000 pda_p->numSector = ftwo->numSector; 1001 pda_p->raidAddress += ftwo_start; 1002 pda_p->startSector += ftwo_start; 1003 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); 1004 break; 1005 default: 1006 RF_PANIC(); 1007 } 1008 pda_p++; 1009 } 1010 1011 RF_ASSERT(pda_p - *pdap == napdas); 1012 return; 1013} 1014#define INIT_DISK_NODE(node,name) \ 1015rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2,1,4,0, dag_h, name, allocList); \ 1016(node)->succedents[0] = unblockNode; \ 1017(node)->succedents[1] = recoveryNode; \ 1018(node)->antecedents[0] = blockNode; \ 1019(node)->antType[0] = rf_control 1020 1021#define DISK_NODE_PARAMS(_node_,_p_) \ 1022 (_node_).params[0].p = _p_ ; \ 1023 (_node_).params[1].p = (_p_)->bufPtr; \ 1024 (_node_).params[2].v = parityStripeID; \ 1025 (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru) 1026 1027void 1028rf_DoubleDegRead(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, 1029 RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, 1030 RF_AllocListElem_t *allocList, 1031 char *redundantReadNodeName, char *recoveryNodeName, 1032 int (*recovFunc) (RF_DagNode_t *)) 1033{ 1034 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 1035 RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *recoveryNode, *blockNode, 1036 *unblockNode, *rpNodes, *rqNodes, *termNode; 1037 RF_PhysDiskAddr_t *pda, *pqPDAs; 1038 RF_PhysDiskAddr_t *npdas; 1039 int nNodes, nRrdNodes, nRudNodes, i; 1040 RF_ReconUnitNum_t which_ru; 1041 int nReadNodes, nPQNodes; 1042 RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0]; 1043 RF_PhysDiskAddr_t *failedPDAtwo = asmap->failedPDAs[1]; 1044 RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru); 1045 1046#if RF_DEBUG_DAG 1047 if (rf_dagDebug) 1048 printf("[Creating Double Degraded Read DAG]\n"); 1049#endif 1050 rf_DD_GenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, &pqPDAs, &nPQNodes, allocList); 1051 1052 nRudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed); 1053 nReadNodes = nRrdNodes + nRudNodes + 2 * nPQNodes; 1054 nNodes = 4 /* block, unblock, recovery, term */ + nReadNodes; 1055 1056 RF_MallocAndAdd(nodes, nNodes * sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); 1057 i = 0; 1058 blockNode = &nodes[i]; 1059 i += 1; 1060 unblockNode = &nodes[i]; 1061 i += 1; 1062 recoveryNode = &nodes[i]; 1063 i += 1; 1064 termNode = &nodes[i]; 1065 i += 1; 1066 rudNodes = &nodes[i]; 1067 i += nRudNodes; 1068 rrdNodes = &nodes[i]; 1069 i += nRrdNodes; 1070 rpNodes = &nodes[i]; 1071 i += nPQNodes; 1072 rqNodes = &nodes[i]; 1073 i += nPQNodes; 1074 RF_ASSERT(i == nNodes); 1075 1076 dag_h->numSuccedents = 1; 1077 dag_h->succedents[0] = blockNode; 1078 dag_h->creator = "DoubleDegRead"; 1079 dag_h->numCommits = 0; 1080 dag_h->numCommitNodes = 1; /* unblock */ 1081 1082 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 2, 0, 0, dag_h, "Trm", allocList); 1083 termNode->antecedents[0] = unblockNode; 1084 termNode->antType[0] = rf_control; 1085 termNode->antecedents[1] = recoveryNode; 1086 termNode->antType[1] = rf_control; 1087 1088 /* init the block and unblock nodes */ 1089 /* The block node has all nodes except itself, unblock and recovery as 1090 * successors. Similarly for predecessors of the unblock. */ 1091 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, "Nil", allocList); 1092 rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nReadNodes, 0, 0, dag_h, "Nil", allocList); 1093 1094 for (i = 0; i < nReadNodes; i++) { 1095 blockNode->succedents[i] = rudNodes + i; 1096 unblockNode->antecedents[i] = rudNodes + i; 1097 unblockNode->antType[i] = rf_control; 1098 } 1099 unblockNode->succedents[0] = termNode; 1100 1101 /* The recovery node has all the reads as predecessors, and the term 1102 * node as successors. It gets a pda as a param from each of the read 1103 * nodes plus the raidPtr. For each failed unit is has a result pda. */ 1104 rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, rf_NullNodeUndoFunc, NULL, 1105 1, /* succesors */ 1106 nReadNodes, /* preds */ 1107 nReadNodes + 2, /* params */ 1108 asmap->numDataFailed, /* results */ 1109 dag_h, recoveryNodeName, allocList); 1110 1111 recoveryNode->succedents[0] = termNode; 1112 for (i = 0; i < nReadNodes; i++) { 1113 recoveryNode->antecedents[i] = rudNodes + i; 1114 recoveryNode->antType[i] = rf_trueData; 1115 } 1116 1117 /* build the read nodes, then come back and fill in recovery params 1118 * and results */ 1119 pda = asmap->physInfo; 1120 for (i = 0; i < nRudNodes; pda = pda->next) { 1121 if ((pda == failedPDA) || (pda == failedPDAtwo)) 1122 continue; 1123 INIT_DISK_NODE(rudNodes + i, "Rud"); 1124 RF_ASSERT(pda); 1125 DISK_NODE_PARAMS(rudNodes[i], pda); 1126 i++; 1127 } 1128 1129 pda = npdas; 1130 for (i = 0; i < nRrdNodes; i++, pda = pda->next) { 1131 INIT_DISK_NODE(rrdNodes + i, "Rrd"); 1132 RF_ASSERT(pda); 1133 DISK_NODE_PARAMS(rrdNodes[i], pda); 1134 } 1135 1136 /* redundancy pdas */ 1137 pda = pqPDAs; 1138 INIT_DISK_NODE(rpNodes, "Rp"); 1139 RF_ASSERT(pda); 1140 DISK_NODE_PARAMS(rpNodes[0], pda); 1141 pda++; 1142 INIT_DISK_NODE(rqNodes, redundantReadNodeName); 1143 RF_ASSERT(pda); 1144 DISK_NODE_PARAMS(rqNodes[0], pda); 1145 if (nPQNodes == 2) { 1146 pda++; 1147 INIT_DISK_NODE(rpNodes + 1, "Rp"); 1148 RF_ASSERT(pda); 1149 DISK_NODE_PARAMS(rpNodes[1], pda); 1150 pda++; 1151 INIT_DISK_NODE(rqNodes + 1, redundantReadNodeName); 1152 RF_ASSERT(pda); 1153 DISK_NODE_PARAMS(rqNodes[1], pda); 1154 } 1155 /* fill in recovery node params */ 1156 for (i = 0; i < nReadNodes; i++) 1157 recoveryNode->params[i] = rudNodes[i].params[0]; /* pda */ 1158 recoveryNode->params[i++].p = (void *) raidPtr; 1159 recoveryNode->params[i++].p = (void *) asmap; 1160 recoveryNode->results[0] = failedPDA; 1161 if (asmap->numDataFailed == 2) 1162 recoveryNode->results[1] = failedPDAtwo; 1163 1164 /* zero fill the target data buffers? */ 1165} 1166 1167#endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) */ 1168