rf_dagffwr.c revision 1.22
1/* $NetBSD: rf_dagffwr.c,v 1.22 2004/03/18 16:40:05 oster Exp $ */ 2/* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29/* 30 * rf_dagff.c 31 * 32 * code for creating fault-free DAGs 33 * 34 */ 35 36#include <sys/cdefs.h> 37__KERNEL_RCSID(0, "$NetBSD: rf_dagffwr.c,v 1.22 2004/03/18 16:40:05 oster Exp $"); 38 39#include <dev/raidframe/raidframevar.h> 40 41#include "rf_raid.h" 42#include "rf_dag.h" 43#include "rf_dagutils.h" 44#include "rf_dagfuncs.h" 45#include "rf_debugMem.h" 46#include "rf_dagffrd.h" 47#include "rf_general.h" 48#include "rf_dagffwr.h" 49 50/****************************************************************************** 51 * 52 * General comments on DAG creation: 53 * 54 * All DAGs in this file use roll-away error recovery. Each DAG has a single 55 * commit node, usually called "Cmt." If an error occurs before the Cmt node 56 * is reached, the execution engine will halt forward execution and work 57 * backward through the graph, executing the undo functions. Assuming that 58 * each node in the graph prior to the Cmt node are undoable and atomic - or - 59 * does not make changes to permanent state, the graph will fail atomically. 60 * If an error occurs after the Cmt node executes, the engine will roll-forward 61 * through the graph, blindly executing nodes until it reaches the end. 62 * If a graph reaches the end, it is assumed to have completed successfully. 63 * 64 * A graph has only 1 Cmt node. 65 * 66 */ 67 68 69/****************************************************************************** 70 * 71 * The following wrappers map the standard DAG creation interface to the 72 * DAG creation routines. Additionally, these wrappers enable experimentation 73 * with new DAG structures by providing an extra level of indirection, allowing 74 * the DAG creation routines to be replaced at this single point. 75 */ 76 77 78void 79rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, 80 RF_DagHeader_t *dag_h, void *bp, 81 RF_RaidAccessFlags_t flags, 82 RF_AllocListElem_t *allocList, 83 RF_IoType_t type) 84{ 85 rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 86 RF_IO_TYPE_WRITE); 87} 88 89void 90rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, 91 RF_DagHeader_t *dag_h, void *bp, 92 RF_RaidAccessFlags_t flags, 93 RF_AllocListElem_t *allocList, 94 RF_IoType_t type) 95{ 96 rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 97 RF_IO_TYPE_WRITE); 98} 99 100void 101rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, 102 RF_DagHeader_t *dag_h, void *bp, 103 RF_RaidAccessFlags_t flags, 104 RF_AllocListElem_t *allocList) 105{ 106 /* "normal" rollaway */ 107 rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, 108 allocList, &rf_xorFuncs, NULL); 109} 110 111void 112rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, 113 RF_DagHeader_t *dag_h, void *bp, 114 RF_RaidAccessFlags_t flags, 115 RF_AllocListElem_t *allocList) 116{ 117 /* "normal" rollaway */ 118 rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, 119 allocList, 1, rf_RegularXorFunc, RF_TRUE); 120} 121 122 123/****************************************************************************** 124 * 125 * DAG creation code begins here 126 */ 127 128 129/****************************************************************************** 130 * 131 * creates a DAG to perform a large-write operation: 132 * 133 * / Rod \ / Wnd \ 134 * H -- block- Rod - Xor - Cmt - Wnd --- T 135 * \ Rod / \ Wnp / 136 * \[Wnq]/ 137 * 138 * The XOR node also does the Q calculation in the P+Q architecture. 139 * All nodes are before the commit node (Cmt) are assumed to be atomic and 140 * undoable - or - they make no changes to permanent state. 141 * 142 * Rod = read old data 143 * Cmt = commit node 144 * Wnp = write new parity 145 * Wnd = write new data 146 * Wnq = write new "q" 147 * [] denotes optional segments in the graph 148 * 149 * Parameters: raidPtr - description of the physical array 150 * asmap - logical & physical addresses for this access 151 * bp - buffer ptr (holds write data) 152 * flags - general flags (e.g. disk locking) 153 * allocList - list of memory allocated in DAG creation 154 * nfaults - number of faults array can tolerate 155 * (equal to # redundancy units in stripe) 156 * redfuncs - list of redundancy generating functions 157 * 158 *****************************************************************************/ 159 160void 161rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, 162 RF_DagHeader_t *dag_h, void *bp, 163 RF_RaidAccessFlags_t flags, 164 RF_AllocListElem_t *allocList, 165 int nfaults, int (*redFunc) (RF_DagNode_t *), 166 int allowBufferRecycle) 167{ 168 RF_DagNode_t *wndNodes, *rodNodes, *xorNode, *wnpNode, *tmpNode; 169 RF_DagNode_t *wnqNode, *blockNode, *commitNode, *termNode; 170 int nWndNodes, nRodNodes, i, nodeNum, asmNum; 171 RF_AccessStripeMapHeader_t *new_asm_h[2]; 172 RF_StripeNum_t parityStripeID; 173 char *sosBuffer, *eosBuffer; 174 RF_ReconUnitNum_t which_ru; 175 RF_RaidLayout_t *layoutPtr; 176 RF_PhysDiskAddr_t *pda; 177 178 layoutPtr = &(raidPtr->Layout); 179 parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, 180 asmap->raidAddress, 181 &which_ru); 182 183#if RF_DEBUG_DAG 184 if (rf_dagDebug) { 185 printf("[Creating large-write DAG]\n"); 186 } 187#endif 188 dag_h->creator = "LargeWriteDAG"; 189 190 dag_h->numCommitNodes = 1; 191 dag_h->numCommits = 0; 192 dag_h->numSuccedents = 1; 193 194 /* alloc the nodes: Wnd, xor, commit, block, term, and Wnp */ 195 nWndNodes = asmap->numStripeUnitsAccessed; 196 197 for (i = 0; i < nWndNodes; i++) { 198 tmpNode = rf_AllocDAGNode(); 199 tmpNode->list_next = dag_h->nodes; 200 dag_h->nodes = tmpNode; 201 } 202 wndNodes = dag_h->nodes; 203 204 xorNode = rf_AllocDAGNode(); 205 xorNode->list_next = dag_h->nodes; 206 dag_h->nodes = xorNode; 207 208 wnpNode = rf_AllocDAGNode(); 209 wnpNode->list_next = dag_h->nodes; 210 dag_h->nodes = wnpNode; 211 212 blockNode = rf_AllocDAGNode(); 213 blockNode->list_next = dag_h->nodes; 214 dag_h->nodes = blockNode; 215 216 commitNode = rf_AllocDAGNode(); 217 commitNode->list_next = dag_h->nodes; 218 dag_h->nodes = commitNode; 219 220 termNode = rf_AllocDAGNode(); 221 termNode->list_next = dag_h->nodes; 222 dag_h->nodes = termNode; 223 224#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) 225 if (nfaults == 2) { 226 wnqNode = rf_AllocDAGNode(); 227 } else { 228#endif 229 wnqNode = NULL; 230#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) 231 } 232#endif 233 rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, 234 new_asm_h, &nRodNodes, &sosBuffer, 235 &eosBuffer, allocList); 236 if (nRodNodes > 0) { 237 for (i = 0; i < nRodNodes; i++) { 238 tmpNode = rf_AllocDAGNode(); 239 tmpNode->list_next = dag_h->nodes; 240 dag_h->nodes = tmpNode; 241 } 242 rodNodes = dag_h->nodes; 243 } else { 244 rodNodes = NULL; 245 } 246 247 /* begin node initialization */ 248 if (nRodNodes > 0) { 249 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, 250 rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0, 251 dag_h, "Nil", allocList); 252 } else { 253 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, 254 rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, 255 dag_h, "Nil", allocList); 256 } 257 258 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, 259 rf_NullNodeUndoFunc, NULL, nWndNodes + nfaults, 1, 0, 0, 260 dag_h, "Cmt", allocList); 261 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, 262 rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0, 263 dag_h, "Trm", allocList); 264 265 /* initialize the Rod nodes */ 266 tmpNode = rodNodes; 267 for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) { 268 if (new_asm_h[asmNum]) { 269 pda = new_asm_h[asmNum]->stripeMap->physInfo; 270 while (pda) { 271 rf_InitNode(tmpNode, rf_wait, 272 RF_FALSE, rf_DiskReadFunc, 273 rf_DiskReadUndoFunc, 274 rf_GenericWakeupFunc, 275 1, 1, 4, 0, dag_h, 276 "Rod", allocList); 277 tmpNode->params[0].p = pda; 278 tmpNode->params[1].p = pda->bufPtr; 279 tmpNode->params[2].v = parityStripeID; 280 tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 281 which_ru); 282 nodeNum++; 283 pda = pda->next; 284 tmpNode = tmpNode->list_next; 285 } 286 } 287 } 288 RF_ASSERT(nodeNum == nRodNodes); 289 290 /* initialize the wnd nodes */ 291 pda = asmap->physInfo; 292 tmpNode = wndNodes; 293 for (i = 0; i < nWndNodes; i++) { 294 rf_InitNode(tmpNode, rf_wait, RF_FALSE, 295 rf_DiskWriteFunc, rf_DiskWriteUndoFunc, 296 rf_GenericWakeupFunc, 1, 1, 4, 0, 297 dag_h, "Wnd", allocList); 298 RF_ASSERT(pda != NULL); 299 tmpNode->params[0].p = pda; 300 tmpNode->params[1].p = pda->bufPtr; 301 tmpNode->params[2].v = parityStripeID; 302 tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 303 pda = pda->next; 304 tmpNode = tmpNode->list_next; 305 } 306 307 /* initialize the redundancy node */ 308 if (nRodNodes > 0) { 309 rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, 310 rf_NullNodeUndoFunc, NULL, 1, 311 nRodNodes, 2 * (nWndNodes + nRodNodes) + 1, 312 nfaults, dag_h, "Xr ", allocList); 313 } else { 314 rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, 315 rf_NullNodeUndoFunc, NULL, 1, 316 1, 2 * (nWndNodes + nRodNodes) + 1, 317 nfaults, dag_h, "Xr ", allocList); 318 } 319 xorNode->flags |= RF_DAGNODE_FLAG_YIELD; 320 tmpNode = wndNodes; 321 for (i = 0; i < nWndNodes; i++) { 322 /* pda */ 323 xorNode->params[2 * i + 0] = tmpNode->params[0]; 324 /* buf ptr */ 325 xorNode->params[2 * i + 1] = tmpNode->params[1]; 326 tmpNode = tmpNode->list_next; 327 } 328 tmpNode = rodNodes; 329 for (i = 0; i < nRodNodes; i++) { 330 /* pda */ 331 xorNode->params[2 * (nWndNodes + i) + 0] = tmpNode->params[0]; 332 /* buf ptr */ 333 xorNode->params[2 * (nWndNodes + i) + 1] = tmpNode->params[1]; 334 tmpNode = tmpNode->list_next; 335 } 336 /* xor node needs to get at RAID information */ 337 xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr; 338 339 /* 340 * Look for an Rod node that reads a complete SU. If none, 341 * alloc a buffer to receive the parity info. Note that we 342 * can't use a new data buffer because it will not have gotten 343 * written when the xor occurs. */ 344 if (allowBufferRecycle) { 345 tmpNode = rodNodes; 346 for (i = 0; i < nRodNodes; i++) { 347 if (((RF_PhysDiskAddr_t *) tmpNode->params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit) 348 break; 349 tmpNode = tmpNode->list_next; 350 } 351 } 352 if ((!allowBufferRecycle) || (i == nRodNodes)) { 353 RF_MallocAndAdd(xorNode->results[0], 354 rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), 355 (void *), allocList); 356 } else { 357 /* this works because the only way we get here is if 358 allowBufferRecycle is true and we went through the 359 above for loop, and exited via the break before 360 i==nRodNodes was true. That means tmpNode will 361 still point to a valid node -- the one we want for 362 here! */ 363 xorNode->results[0] = tmpNode->params[1].p; 364 } 365 366 /* initialize the Wnp node */ 367 rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, 368 rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, 369 dag_h, "Wnp", allocList); 370 wnpNode->params[0].p = asmap->parityInfo; 371 wnpNode->params[1].p = xorNode->results[0]; 372 wnpNode->params[2].v = parityStripeID; 373 wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 374 /* parityInfo must describe entire parity unit */ 375 RF_ASSERT(asmap->parityInfo->next == NULL); 376 377#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) 378 if (nfaults == 2) { 379 /* 380 * We never try to recycle a buffer for the Q calcuation 381 * in addition to the parity. This would cause two buffers 382 * to get smashed during the P and Q calculation, guaranteeing 383 * one would be wrong. 384 */ 385 RF_MallocAndAdd(xorNode->results[1], 386 rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), 387 (void *), allocList); 388 rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, 389 rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 390 1, 1, 4, 0, dag_h, "Wnq", allocList); 391 wnqNode->params[0].p = asmap->qInfo; 392 wnqNode->params[1].p = xorNode->results[1]; 393 wnqNode->params[2].v = parityStripeID; 394 wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 395 /* parityInfo must describe entire parity unit */ 396 RF_ASSERT(asmap->parityInfo->next == NULL); 397 } 398#endif 399 /* 400 * Connect nodes to form graph. 401 */ 402 403 /* connect dag header to block node */ 404 RF_ASSERT(blockNode->numAntecedents == 0); 405 dag_h->succedents[0] = blockNode; 406 407 if (nRodNodes > 0) { 408 /* connect the block node to the Rod nodes */ 409 RF_ASSERT(blockNode->numSuccedents == nRodNodes); 410 RF_ASSERT(xorNode->numAntecedents == nRodNodes); 411 tmpNode = rodNodes; 412 for (i = 0; i < nRodNodes; i++) { 413 RF_ASSERT(tmpNode.numAntecedents == 1); 414 blockNode->succedents[i] = tmpNode; 415 tmpNode->antecedents[0] = blockNode; 416 tmpNode->antType[0] = rf_control; 417 418 /* connect the Rod nodes to the Xor node */ 419 RF_ASSERT(tmpNode.numSuccedents == 1); 420 tmpNode->succedents[0] = xorNode; 421 xorNode->antecedents[i] = tmpNode; 422 xorNode->antType[i] = rf_trueData; 423 tmpNode = tmpNode->list_next; 424 } 425 } else { 426 /* connect the block node to the Xor node */ 427 RF_ASSERT(blockNode->numSuccedents == 1); 428 RF_ASSERT(xorNode->numAntecedents == 1); 429 blockNode->succedents[0] = xorNode; 430 xorNode->antecedents[0] = blockNode; 431 xorNode->antType[0] = rf_control; 432 } 433 434 /* connect the xor node to the commit node */ 435 RF_ASSERT(xorNode->numSuccedents == 1); 436 RF_ASSERT(commitNode->numAntecedents == 1); 437 xorNode->succedents[0] = commitNode; 438 commitNode->antecedents[0] = xorNode; 439 commitNode->antType[0] = rf_control; 440 441 /* connect the commit node to the write nodes */ 442 RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults); 443 tmpNode = wndNodes; 444 for (i = 0; i < nWndNodes; i++) { 445 RF_ASSERT(wndNodes->numAntecedents == 1); 446 commitNode->succedents[i] = tmpNode; 447 tmpNode->antecedents[0] = commitNode; 448 tmpNode->antType[0] = rf_control; 449 tmpNode = tmpNode->list_next; 450 } 451 RF_ASSERT(wnpNode->numAntecedents == 1); 452 commitNode->succedents[nWndNodes] = wnpNode; 453 wnpNode->antecedents[0] = commitNode; 454 wnpNode->antType[0] = rf_trueData; 455#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) 456 if (nfaults == 2) { 457 RF_ASSERT(wnqNode->numAntecedents == 1); 458 commitNode->succedents[nWndNodes + 1] = wnqNode; 459 wnqNode->antecedents[0] = commitNode; 460 wnqNode->antType[0] = rf_trueData; 461 } 462#endif 463 /* connect the write nodes to the term node */ 464 RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults); 465 RF_ASSERT(termNode->numSuccedents == 0); 466 tmpNode = wndNodes; 467 for (i = 0; i < nWndNodes; i++) { 468 RF_ASSERT(wndNodes->numSuccedents == 1); 469 tmpNode->succedents[0] = termNode; 470 termNode->antecedents[i] = tmpNode; 471 termNode->antType[i] = rf_control; 472 tmpNode = tmpNode->list_next; 473 } 474 RF_ASSERT(wnpNode->numSuccedents == 1); 475 wnpNode->succedents[0] = termNode; 476 termNode->antecedents[nWndNodes] = wnpNode; 477 termNode->antType[nWndNodes] = rf_control; 478#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) 479 if (nfaults == 2) { 480 RF_ASSERT(wnqNode->numSuccedents == 1); 481 wnqNode->succedents[0] = termNode; 482 termNode->antecedents[nWndNodes + 1] = wnqNode; 483 termNode->antType[nWndNodes + 1] = rf_control; 484 } 485#endif 486} 487/****************************************************************************** 488 * 489 * creates a DAG to perform a small-write operation (either raid 5 or pq), 490 * which is as follows: 491 * 492 * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm 493 * \- Rod X / \----> Wnd [Und]-/ 494 * [\- Rod X / \---> Wnd [Und]-/] 495 * [\- Roq -> Q / \--> Wnq [Unq]-/] 496 * 497 * Rop = read old parity 498 * Rod = read old data 499 * Roq = read old "q" 500 * Cmt = commit node 501 * Und = unlock data disk 502 * Unp = unlock parity disk 503 * Unq = unlock q disk 504 * Wnp = write new parity 505 * Wnd = write new data 506 * Wnq = write new "q" 507 * [ ] denotes optional segments in the graph 508 * 509 * Parameters: raidPtr - description of the physical array 510 * asmap - logical & physical addresses for this access 511 * bp - buffer ptr (holds write data) 512 * flags - general flags (e.g. disk locking) 513 * allocList - list of memory allocated in DAG creation 514 * pfuncs - list of parity generating functions 515 * qfuncs - list of q generating functions 516 * 517 * A null qfuncs indicates single fault tolerant 518 *****************************************************************************/ 519 520void 521rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, 522 RF_DagHeader_t *dag_h, void *bp, 523 RF_RaidAccessFlags_t flags, 524 RF_AllocListElem_t *allocList, 525 const RF_RedFuncs_t *pfuncs, 526 const RF_RedFuncs_t *qfuncs) 527{ 528 RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode; 529 RF_DagNode_t *tmpNode, *tmpreadDataNode, *tmpreadParityNode; 530 RF_DagNode_t *xorNodes, *qNodes, *blockNode, *commitNode; 531 RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes; 532 RF_DagNode_t *tmpxorNode, *tmpqNode, *tmpwriteDataNode, *tmpreadQNode; 533 RF_DagNode_t *tmpwriteParityNode; 534#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) 535 RF_DagNode_t *tmpwriteQNode; 536#endif 537 int i, j, nNodes, totalNumNodes; 538 RF_ReconUnitNum_t which_ru; 539 int (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *); 540 int (*qfunc) (RF_DagNode_t *); 541 int numDataNodes, numParityNodes; 542 RF_StripeNum_t parityStripeID; 543 RF_PhysDiskAddr_t *pda; 544 char *name, *qname; 545 long nfaults; 546 547 nfaults = qfuncs ? 2 : 1; 548 549 parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), 550 asmap->raidAddress, &which_ru); 551 pda = asmap->physInfo; 552 numDataNodes = asmap->numStripeUnitsAccessed; 553 numParityNodes = (asmap->parityInfo->next) ? 2 : 1; 554 555#if RF_DEBUG_DAG 556 if (rf_dagDebug) { 557 printf("[Creating small-write DAG]\n"); 558 } 559#endif 560 RF_ASSERT(numDataNodes > 0); 561 dag_h->creator = "SmallWriteDAG"; 562 563 dag_h->numCommitNodes = 1; 564 dag_h->numCommits = 0; 565 dag_h->numSuccedents = 1; 566 567 /* 568 * DAG creation occurs in four steps: 569 * 1. count the number of nodes in the DAG 570 * 2. create the nodes 571 * 3. initialize the nodes 572 * 4. connect the nodes 573 */ 574 575 /* 576 * Step 1. compute number of nodes in the graph 577 */ 578 579 /* number of nodes: a read and write for each data unit a 580 * redundancy computation node for each parity node (nfaults * 581 * nparity) a read and write for each parity unit a block and 582 * commit node (2) a terminate node if atomic RMW an unlock 583 * node for each data unit, redundancy unit */ 584 totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes) 585 + (nfaults * 2 * numParityNodes) + 3; 586 /* 587 * Step 2. create the nodes 588 */ 589 590 blockNode = rf_AllocDAGNode(); 591 blockNode->list_next = dag_h->nodes; 592 dag_h->nodes = blockNode; 593 594 commitNode = rf_AllocDAGNode(); 595 commitNode->list_next = dag_h->nodes; 596 dag_h->nodes = commitNode; 597 598 for (i = 0; i < numDataNodes; i++) { 599 tmpNode = rf_AllocDAGNode(); 600 tmpNode->list_next = dag_h->nodes; 601 dag_h->nodes = tmpNode; 602 } 603 readDataNodes = dag_h->nodes; 604 605 for (i = 0; i < numParityNodes; i++) { 606 tmpNode = rf_AllocDAGNode(); 607 tmpNode->list_next = dag_h->nodes; 608 dag_h->nodes = tmpNode; 609 } 610 readParityNodes = dag_h->nodes; 611 612 for (i = 0; i < numDataNodes; i++) { 613 tmpNode = rf_AllocDAGNode(); 614 tmpNode->list_next = dag_h->nodes; 615 dag_h->nodes = tmpNode; 616 } 617 writeDataNodes = dag_h->nodes; 618 619 for (i = 0; i < numParityNodes; i++) { 620 tmpNode = rf_AllocDAGNode(); 621 tmpNode->list_next = dag_h->nodes; 622 dag_h->nodes = tmpNode; 623 } 624 writeParityNodes = dag_h->nodes; 625 626 for (i = 0; i < numParityNodes; i++) { 627 tmpNode = rf_AllocDAGNode(); 628 tmpNode->list_next = dag_h->nodes; 629 dag_h->nodes = tmpNode; 630 } 631 xorNodes = dag_h->nodes; 632 633 termNode = rf_AllocDAGNode(); 634 termNode->list_next = dag_h->nodes; 635 dag_h->nodes = termNode; 636 637#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) 638 if (nfaults == 2) { 639 for (i = 0; i < numParityNodes; i++) { 640 tmpNode = rf_AllocDAGNode(); 641 tmpNode->list_next = dag_h->nodes; 642 dag_h->nodes = tmpNode; 643 } 644 readQNodes = dag_h->nodes; 645 646 for (i = 0; i < numParityNodes; i++) { 647 tmpNode = rf_AllocDAGNode(); 648 tmpNode->list_next = dag_h->nodes; 649 dag_h->nodes = tmpNode; 650 } 651 writeQNodes = dag_h->nodes; 652 653 for (i = 0; i < numParityNodes; i++) { 654 tmpNode = rf_AllocDAGNode(); 655 tmpNode->list_next = dag_h->nodes; 656 dag_h->nodes = tmpNode; 657 } 658 qNodes = dag_h->nodes; 659 } else { 660#endif 661 readQNodes = writeQNodes = qNodes = NULL; 662#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) 663 } 664#endif 665 RF_ASSERT(i == totalNumNodes); 666 667 /* 668 * Step 3. initialize the nodes 669 */ 670 /* initialize block node (Nil) */ 671 nNodes = numDataNodes + (nfaults * numParityNodes); 672 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, 673 rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, 674 dag_h, "Nil", allocList); 675 676 /* initialize commit node (Cmt) */ 677 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, 678 rf_NullNodeUndoFunc, NULL, nNodes, 679 (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList); 680 681 /* initialize terminate node (Trm) */ 682 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, 683 rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0, 684 dag_h, "Trm", allocList); 685 686 /* initialize nodes which read old data (Rod) */ 687 tmpreadDataNode = readDataNodes; 688 for (i = 0; i < numDataNodes; i++) { 689 rf_InitNode(tmpreadDataNode, rf_wait, RF_FALSE, 690 rf_DiskReadFunc, rf_DiskReadUndoFunc, 691 rf_GenericWakeupFunc, (nfaults * numParityNodes), 692 1, 4, 0, dag_h, "Rod", allocList); 693 RF_ASSERT(pda != NULL); 694 /* physical disk addr desc */ 695 tmpreadDataNode->params[0].p = pda; 696 /* buffer to hold old data */ 697 tmpreadDataNode->params[1].p = rf_AllocBuffer(raidPtr, pda, allocList); 698 tmpreadDataNode->params[2].v = parityStripeID; 699 tmpreadDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 700 which_ru); 701 pda = pda->next; 702 for (j = 0; j < tmpreadDataNode->numSuccedents; j++) { 703 tmpreadDataNode->propList[j] = NULL; 704 } 705 tmpreadDataNode = tmpreadDataNode->list_next; 706 } 707 708 /* initialize nodes which read old parity (Rop) */ 709 pda = asmap->parityInfo; 710 i = 0; 711 tmpreadParityNode = readParityNodes; 712 for (i = 0; i < numParityNodes; i++) { 713 RF_ASSERT(pda != NULL); 714 rf_InitNode(tmpreadParityNode, rf_wait, RF_FALSE, 715 rf_DiskReadFunc, rf_DiskReadUndoFunc, 716 rf_GenericWakeupFunc, numParityNodes, 1, 4, 0, 717 dag_h, "Rop", allocList); 718 tmpreadParityNode->params[0].p = pda; 719 /* buffer to hold old parity */ 720 tmpreadParityNode->params[1].p = rf_AllocBuffer(raidPtr, pda, allocList); 721 tmpreadParityNode->params[2].v = parityStripeID; 722 tmpreadParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 723 which_ru); 724 pda = pda->next; 725 for (j = 0; j < tmpreadParityNode->numSuccedents; j++) { 726 tmpreadParityNode->propList[0] = NULL; 727 } 728 tmpreadParityNode = tmpreadParityNode->list_next; 729 } 730 731#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) 732 /* initialize nodes which read old Q (Roq) */ 733 if (nfaults == 2) { 734 pda = asmap->qInfo; 735 tmpreadQNode = readQNodes; 736 for (i = 0; i < numParityNodes; i++) { 737 RF_ASSERT(pda != NULL); 738 rf_InitNode(tmpreadQNode, rf_wait, RF_FALSE, 739 rf_DiskReadFunc, rf_DiskReadUndoFunc, 740 rf_GenericWakeupFunc, numParityNodes, 741 1, 4, 0, dag_h, "Roq", allocList); 742 tmpreadQNode->params[0].p = pda; 743 /* buffer to hold old Q */ 744 tmpreadQNode->params[1].p = rf_AllocBuffer(raidPtr, pda, allocList); 745 tmpreadQNode->params[2].v = parityStripeID; 746 tmpreadQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 747 which_ru); 748 pda = pda->next; 749 for (j = 0; j < tmpreadQNode->numSuccedents; j++) { 750 tmpreadQNode->propList[0] = NULL; 751 } 752 tmpreadQNode = tmpreadQNode->list_next; 753 } 754 } 755#endif 756 /* initialize nodes which write new data (Wnd) */ 757 pda = asmap->physInfo; 758 tmpwriteDataNode = writeDataNodes; 759 for (i = 0; i < numDataNodes; i++) { 760 RF_ASSERT(pda != NULL); 761 rf_InitNode(tmpwriteDataNode, rf_wait, RF_FALSE, 762 rf_DiskWriteFunc, rf_DiskWriteUndoFunc, 763 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, 764 "Wnd", allocList); 765 /* physical disk addr desc */ 766 tmpwriteDataNode->params[0].p = pda; 767 /* buffer holding new data to be written */ 768 tmpwriteDataNode->params[1].p = pda->bufPtr; 769 tmpwriteDataNode->params[2].v = parityStripeID; 770 tmpwriteDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 771 which_ru); 772 pda = pda->next; 773 tmpwriteDataNode = tmpwriteDataNode->list_next; 774 } 775 776 /* 777 * Initialize nodes which compute new parity and Q. 778 */ 779 /* 780 * We use the simple XOR func in the double-XOR case, and when 781 * we're accessing only a portion of one stripe unit. The 782 * distinction between the two is that the regular XOR func 783 * assumes that the targbuf is a full SU in size, and examines 784 * the pda associated with the buffer to decide where within 785 * the buffer to XOR the data, whereas the simple XOR func 786 * just XORs the data into the start of the buffer. */ 787 if ((numParityNodes == 2) || ((numDataNodes == 1) 788 && (asmap->totalSectorsAccessed < 789 raidPtr->Layout.sectorsPerStripeUnit))) { 790 func = pfuncs->simple; 791 undoFunc = rf_NullNodeUndoFunc; 792 name = pfuncs->SimpleName; 793 if (qfuncs) { 794 qfunc = qfuncs->simple; 795 qname = qfuncs->SimpleName; 796 } else { 797 qfunc = NULL; 798 qname = NULL; 799 } 800 } else { 801 func = pfuncs->regular; 802 undoFunc = rf_NullNodeUndoFunc; 803 name = pfuncs->RegularName; 804 if (qfuncs) { 805 qfunc = qfuncs->regular; 806 qname = qfuncs->RegularName; 807 } else { 808 qfunc = NULL; 809 qname = NULL; 810 } 811 } 812 /* 813 * Initialize the xor nodes: params are {pda,buf} 814 * from {Rod,Wnd,Rop} nodes, and raidPtr 815 */ 816 if (numParityNodes == 2) { 817 /* double-xor case */ 818 tmpxorNode = xorNodes; 819 tmpreadDataNode = readDataNodes; 820 tmpreadParityNode = readParityNodes; 821 tmpwriteDataNode = writeDataNodes; 822 tmpqNode = qNodes; 823 tmpreadQNode = readQNodes; 824 for (i = 0; i < numParityNodes; i++) { 825 /* note: no wakeup func for xor */ 826 rf_InitNode(tmpxorNode, rf_wait, RF_FALSE, func, 827 undoFunc, NULL, 1, 828 (numDataNodes + numParityNodes), 829 7, 1, dag_h, name, allocList); 830 tmpxorNode->flags |= RF_DAGNODE_FLAG_YIELD; 831 tmpxorNode->params[0] = tmpreadDataNode->params[0]; 832 tmpxorNode->params[1] = tmpreadDataNode->params[1]; 833 tmpxorNode->params[2] = tmpreadParityNode->params[0]; 834 tmpxorNode->params[3] = tmpreadParityNode->params[1]; 835 tmpxorNode->params[4] = tmpwriteDataNode->params[0]; 836 tmpxorNode->params[5] = tmpwriteDataNode->params[1]; 837 tmpxorNode->params[6].p = raidPtr; 838 /* use old parity buf as target buf */ 839 tmpxorNode->results[0] = tmpreadParityNode->params[1].p; 840#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) 841 if (nfaults == 2) { 842 /* note: no wakeup func for qor */ 843 rf_InitNode(tmpqNode, rf_wait, RF_FALSE, 844 qfunc, undoFunc, NULL, 1, 845 (numDataNodes + numParityNodes), 846 7, 1, dag_h, qname, allocList); 847 tmpqNode->params[0] = tmpreadDataNode->params[0]; 848 tmpqNode->params[1] = tmpreadDataNode->params[1]; 849 tmpqNode->params[2] = tmpreadQNode->.params[0]; 850 tmpqNode->params[3] = tmpreadQNode->params[1]; 851 tmpqNode->params[4] = tmpwriteDataNode->params[0]; 852 tmpqNode->params[5] = tmpwriteDataNode->params[1]; 853 tmpqNode->params[6].p = raidPtr; 854 /* use old Q buf as target buf */ 855 tmpqNode->results[0] = tmpreadQNode->params[1].p; 856 tmpqNode = tmpqNode->list_next; 857 tmpreadQNodes = tmpreadQNodes->list_next; 858 } 859#endif 860 tmpxorNode = tmpxorNode->list_next; 861 tmpreadDataNode = tmpreadDataNode->list_next; 862 tmpreadParityNode = tmpreadParityNode->list_next; 863 tmpwriteDataNode = tmpwriteDataNode->list_next; 864 } 865 } else { 866 /* there is only one xor node in this case */ 867 rf_InitNode(xorNodes, rf_wait, RF_FALSE, func, 868 undoFunc, NULL, 1, (numDataNodes + numParityNodes), 869 (2 * (numDataNodes + numDataNodes + 1) + 1), 1, 870 dag_h, name, allocList); 871 xorNodes->flags |= RF_DAGNODE_FLAG_YIELD; 872 tmpreadDataNode = readDataNodes; 873 for (i = 0; i < numDataNodes; i++) { /* used to be"numDataNodes + 1" until we factored 874 out the "+1" into the "deal with Rop separately below */ 875 /* set up params related to Rod nodes */ 876 xorNodes->params[2 * i + 0] = tmpreadDataNode->params[0]; /* pda */ 877 xorNodes->params[2 * i + 1] = tmpreadDataNode->params[1]; /* buffer ptr */ 878 tmpreadDataNode = tmpreadDataNode->list_next; 879 } 880 /* deal with Rop separately */ 881 xorNodes->params[2 * numDataNodes + 0] = readParityNodes->params[0]; /* pda */ 882 xorNodes->params[2 * numDataNodes + 1] = readParityNodes->params[1]; /* buffer ptr */ 883 884 tmpwriteDataNode = writeDataNodes; 885 for (i = 0; i < numDataNodes; i++) { 886 /* set up params related to Wnd and Wnp nodes */ 887 xorNodes->params[2 * (numDataNodes + 1 + i) + 0] = /* pda */ 888 tmpwriteDataNode->params[0]; 889 xorNodes->params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */ 890 tmpwriteDataNode->params[1]; 891 tmpwriteDataNode = tmpwriteDataNode->list_next; 892 } 893 /* xor node needs to get at RAID information */ 894 xorNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; 895 xorNodes->results[0] = readParityNodes->params[1].p; 896#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) 897 if (nfaults == 2) { 898 rf_InitNode(qNodes, rf_wait, RF_FALSE, qfunc, 899 undoFunc, NULL, 1, 900 (numDataNodes + numParityNodes), 901 (2 * (numDataNodes + numDataNodes + 1) + 1), 1, 902 dag_h, qname, allocList); 903 tmpreadDataNode = readDataNodes; 904 for (i = 0; i < numDataNodes; i++) { 905 /* set up params related to Rod */ 906 qNodes->params[2 * i + 0] = tmpreadDataNode->params[0]; /* pda */ 907 qNodes->params[2 * i + 1] = tmpreadDataNode->params[1]; /* buffer ptr */ 908 tmpreadDataNode = tmpreadDataNode->list_next; 909 } 910 /* and read old q */ 911 qNodes->params[2 * numDataNodes + 0] = /* pda */ 912 readQNodes->params[0]; 913 qNodes->params[2 * numDataNodes + 1] = /* buffer ptr */ 914 readQNodes->params[1]; 915 tmpwriteDataNode = writeDataNodes; 916 for (i = 0; i < numDataNodes; i++) { 917 /* set up params related to Wnd nodes */ 918 qNodes->params[2 * (numDataNodes + 1 + i) + 0] = /* pda */ 919 tmpwriteDataNode->params[0]; 920 qNodes->params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */ 921 tmpwriteDataNode->params[1]; 922 tmpwriteDataNode = tmpwriteDataNode->list_next; 923 } 924 /* xor node needs to get at RAID information */ 925 qNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; 926 qNodes->results[0] = readQNodes->params[1].p; 927 } 928#endif 929 } 930 931 /* initialize nodes which write new parity (Wnp) */ 932 pda = asmap->parityInfo; 933 tmpwriteParityNode = writeParityNodes; 934 tmpxorNode = xorNodes; 935 for (i = 0; i < numParityNodes; i++) { 936 rf_InitNode(tmpwriteParityNode, rf_wait, RF_FALSE, 937 rf_DiskWriteFunc, rf_DiskWriteUndoFunc, 938 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, 939 "Wnp", allocList); 940 RF_ASSERT(pda != NULL); 941 tmpwriteParityNode->params[0].p = pda; /* param 1 (bufPtr) 942 * filled in by xor node */ 943 tmpwriteParityNode->params[1].p = tmpxorNode->results[0]; /* buffer pointer for 944 * parity write 945 * operation */ 946 tmpwriteParityNode->params[2].v = parityStripeID; 947 tmpwriteParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 948 which_ru); 949 pda = pda->next; 950 tmpwriteParityNode = tmpwriteParityNode->list_next; 951 tmpxorNode = tmpxorNode->list_next; 952 } 953 954#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) 955 /* initialize nodes which write new Q (Wnq) */ 956 if (nfaults == 2) { 957 pda = asmap->qInfo; 958 tmpwriteQNode = writeQNodes; 959 tmpqNode = qNodes; 960 for (i = 0; i < numParityNodes; i++) { 961 rf_InitNode(tmpwriteQNode, rf_wait, RF_FALSE, 962 rf_DiskWriteFunc, rf_DiskWriteUndoFunc, 963 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, 964 "Wnq", allocList); 965 RF_ASSERT(pda != NULL); 966 tmpwriteQNode->params[0].p = pda; /* param 1 (bufPtr) 967 * filled in by xor node */ 968 tmpwriteQNode->params[1].p = tmpqNode->results[0]; /* buffer pointer for 969 * parity write 970 * operation */ 971 tmpwriteQNode->params[2].v = parityStripeID; 972 tmpwriteQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 973 which_ru); 974 pda = pda->next; 975 tmpwriteQNode = tmpwriteQNode->list_next; 976 tmpqNode = tmpqNode->list_next; 977 } 978 } 979#endif 980 /* 981 * Step 4. connect the nodes. 982 */ 983 984 /* connect header to block node */ 985 dag_h->succedents[0] = blockNode; 986 987 /* connect block node to read old data nodes */ 988 RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults))); 989 tmpreadDataNode = readDataNodes; 990 for (i = 0; i < numDataNodes; i++) { 991 blockNode->succedents[i] = tmpreadDataNode; 992 RF_ASSERT(tmpreadDataNode->numAntecedents == 1); 993 tmpreadDataNode->antecedents[0] = blockNode; 994 tmpreadDataNode->antType[0] = rf_control; 995 tmpreadDataNode = tmpreadDataNode->list_next; 996 } 997 998 /* connect block node to read old parity nodes */ 999 tmpreadParityNode = readParityNodes; 1000 for (i = 0; i < numParityNodes; i++) { 1001 blockNode->succedents[numDataNodes + i] = tmpreadParityNode; 1002 RF_ASSERT(tmpreadParityNode->numAntecedents == 1); 1003 tmpreadParityNode->antecedents[0] = blockNode; 1004 tmpreadParityNode->antType[0] = rf_control; 1005 tmpreadParityNode = tmpreadParityNode->list_next; 1006 } 1007 1008#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) 1009 /* connect block node to read old Q nodes */ 1010 if (nfaults == 2) { 1011 tmpreadQNode = readQNodes; 1012 for (i = 0; i < numParityNodes; i++) { 1013 blockNode->succedents[numDataNodes + numParityNodes + i] = tmpreadQNode; 1014 RF_ASSERT(tmpreadQNode->numAntecedents == 1); 1015 tmpreadQNode->antecedents[0] = blockNode; 1016 tmpreadQNode->antType[0] = rf_control; 1017 tmpreadQNode = tmpreadQNode->list_next; 1018 } 1019 } 1020#endif 1021 /* connect read old data nodes to xor nodes */ 1022 tmpreadDataNode = readDataNodes; 1023 for (i = 0; i < numDataNodes; i++) { 1024 RF_ASSERT(tmpreadDataNode->numSuccedents == (nfaults * numParityNodes)); 1025 tmpxorNode = xorNodes; 1026 for (j = 0; j < numParityNodes; j++) { 1027 RF_ASSERT(tmpxorNode->numAntecedents == numDataNodes + numParityNodes); 1028 tmpreadDataNode->succedents[j] = tmpxorNode; 1029 tmpxorNode->antecedents[i] = tmpreadDataNode; 1030 tmpxorNode->antType[i] = rf_trueData; 1031 tmpxorNode = tmpxorNode->list_next; 1032 } 1033 tmpreadDataNode = tmpreadDataNode->list_next; 1034 } 1035 1036#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) 1037 /* connect read old data nodes to q nodes */ 1038 if (nfaults == 2) { 1039 tmpreadDataNode = readDataNodes; 1040 for (i = 0; i < numDataNodes; i++) { 1041 tmpqNode = qNodes; 1042 for (j = 0; j < numParityNodes; j++) { 1043 RF_ASSERT(tmpqNode->numAntecedents == numDataNodes + numParityNodes); 1044 tmpreadDataNode->succedents[numParityNodes + j] = tmpqNode; 1045 tmpqNode->antecedents[i] = tmpreadDataNode; 1046 tmpqNode->antType[i] = rf_trueData; 1047 tmpqNode = tmpqNode->list_next; 1048 } 1049 tmpreadDataNode = tmpreadDataNode->list_next; 1050 } 1051 } 1052#endif 1053 /* connect read old parity nodes to xor nodes */ 1054 tmpreadParityNode = readParityNodes; 1055 for (i = 0; i < numParityNodes; i++) { 1056 RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes); 1057 tmpxorNode = xorNodes; 1058 for (j = 0; j < numParityNodes; j++) { 1059 tmpreadParityNode->succedents[j] = tmpxorNode; 1060 tmpxorNode->antecedents[numDataNodes + i] = tmpreadParityNode; 1061 tmpxorNode->antType[numDataNodes + i] = rf_trueData; 1062 tmpxorNode = tmpxorNode->list_next; 1063 } 1064 tmpreadParityNode = tmpreadParityNode->list_next; 1065 } 1066 1067#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) 1068 /* connect read old q nodes to q nodes */ 1069 if (nfaults == 2) { 1070 tmpreadParityNode = readParityNodes; 1071 tmpreadQNode = readQNodes; 1072 for (i = 0; i < numParityNodes; i++) { 1073 RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes); 1074 tmpqNode = qNodes; 1075 for (j = 0; j < numParityNodes; j++) { 1076 tmpreadQNode->succedents[j] = tmpqNode; 1077 tmpqNode->antecedents[numDataNodes + i] = tmpreadQNodes; 1078 tmpqNode->antType[numDataNodes + i] = rf_trueData; 1079 tmpqNode = tmpqNode->list_next; 1080 } 1081 tmpreadParityNode = tmpreadParityNode->list_next; 1082 tmpreadQNode = tmpreadQNode->list_next; 1083 } 1084 } 1085#endif 1086 /* connect xor nodes to commit node */ 1087 RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes)); 1088 tmpxorNode = xorNodes; 1089 for (i = 0; i < numParityNodes; i++) { 1090 RF_ASSERT(tmpxorNode->numSuccedents == 1); 1091 tmpxorNode->succedents[0] = commitNode; 1092 commitNode->antecedents[i] = tmpxorNode; 1093 commitNode->antType[i] = rf_control; 1094 tmpxorNode = tmpxorNode->list_next; 1095 } 1096 1097#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) 1098 /* connect q nodes to commit node */ 1099 if (nfaults == 2) { 1100 tmpqNode = qNodes; 1101 for (i = 0; i < numParityNodes; i++) { 1102 RF_ASSERT(tmpqNode->numSuccedents == 1); 1103 tmpqNode->succedents[0] = commitNode; 1104 commitNode->antecedents[i + numParityNodes] = tmpqNode; 1105 commitNode->antType[i + numParityNodes] = rf_control; 1106 tmpqNode = tmpqNode->list_next; 1107 } 1108 } 1109#endif 1110 /* connect commit node to write nodes */ 1111 RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes))); 1112 tmpwriteDataNode = writeDataNodes; 1113 for (i = 0; i < numDataNodes; i++) { 1114 RF_ASSERT(tmpwriteDataNodes->numAntecedents == 1); 1115 commitNode->succedents[i] = tmpwriteDataNode; 1116 tmpwriteDataNode->antecedents[0] = commitNode; 1117 tmpwriteDataNode->antType[0] = rf_trueData; 1118 tmpwriteDataNode = tmpwriteDataNode->list_next; 1119 } 1120 tmpwriteParityNode = writeParityNodes; 1121 for (i = 0; i < numParityNodes; i++) { 1122 RF_ASSERT(tmpwriteParityNode->numAntecedents == 1); 1123 commitNode->succedents[i + numDataNodes] = tmpwriteParityNode; 1124 tmpwriteParityNode->antecedents[0] = commitNode; 1125 tmpwriteParityNode->antType[0] = rf_trueData; 1126 tmpwriteParityNode = tmpwriteParityNode->list_next; 1127 } 1128#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) 1129 if (nfaults == 2) { 1130 tmpwriteQNode = writeQNodes; 1131 for (i = 0; i < numParityNodes; i++) { 1132 RF_ASSERT(tmpwriteQNode->numAntecedents == 1); 1133 commitNode->succedents[i + numDataNodes + numParityNodes] = tmpwriteQNode; 1134 tmpwriteQNode->antecedents[0] = commitNode; 1135 tmpwriteQNode->antType[0] = rf_trueData; 1136 tmpwriteQNode = tmpwriteQNode->list_next; 1137 } 1138 } 1139#endif 1140 RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); 1141 RF_ASSERT(termNode->numSuccedents == 0); 1142 tmpwriteDataNode = writeDataNodes; 1143 for (i = 0; i < numDataNodes; i++) { 1144 /* connect write new data nodes to term node */ 1145 RF_ASSERT(tmpwriteDataNode->numSuccedents == 1); 1146 RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); 1147 tmpwriteDataNode->succedents[0] = termNode; 1148 termNode->antecedents[i] = tmpwriteDataNode; 1149 termNode->antType[i] = rf_control; 1150 tmpwriteDataNode = tmpwriteDataNode->list_next; 1151 } 1152 1153 tmpwriteParityNode = writeParityNodes; 1154 for (i = 0; i < numParityNodes; i++) { 1155 RF_ASSERT(tmpwriteParityNode->numSuccedents == 1); 1156 tmpwriteParityNode->succedents[0] = termNode; 1157 termNode->antecedents[numDataNodes + i] = tmpwriteParityNode; 1158 termNode->antType[numDataNodes + i] = rf_control; 1159 tmpwriteParityNode = tmpwriteParityNode->list_next; 1160 } 1161 1162#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) 1163 if (nfaults == 2) { 1164 tmpwriteQNode = writeQNodes; 1165 for (i = 0; i < numParityNodes; i++) { 1166 RF_ASSERT(tmpwriteQNode->numSuccedents == 1); 1167 tmpwriteQNode->succedents[0] = termNode; 1168 termNode->antecedents[numDataNodes + numParityNodes + i] = tmpwriteQNode; 1169 termNode->antType[numDataNodes + numParityNodes + i] = rf_control; 1170 tmpwriteQNode = tmpwriteQNode->list_next; 1171 } 1172 } 1173#endif 1174} 1175 1176 1177/****************************************************************************** 1178 * create a write graph (fault-free or degraded) for RAID level 1 1179 * 1180 * Hdr -> Commit -> Wpd -> Nil -> Trm 1181 * -> Wsd -> 1182 * 1183 * The "Wpd" node writes data to the primary copy in the mirror pair 1184 * The "Wsd" node writes data to the secondary copy in the mirror pair 1185 * 1186 * Parameters: raidPtr - description of the physical array 1187 * asmap - logical & physical addresses for this access 1188 * bp - buffer ptr (holds write data) 1189 * flags - general flags (e.g. disk locking) 1190 * allocList - list of memory allocated in DAG creation 1191 *****************************************************************************/ 1192 1193void 1194rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, 1195 RF_DagHeader_t *dag_h, void *bp, 1196 RF_RaidAccessFlags_t flags, 1197 RF_AllocListElem_t *allocList) 1198{ 1199 RF_DagNode_t *unblockNode, *termNode, *commitNode; 1200 RF_DagNode_t *wndNode, *wmirNode; 1201 RF_DagNode_t *tmpNode, *tmpwndNode, *tmpwmirNode; 1202 int nWndNodes, nWmirNodes, i; 1203 RF_ReconUnitNum_t which_ru; 1204 RF_PhysDiskAddr_t *pda, *pdaP; 1205 RF_StripeNum_t parityStripeID; 1206 1207 parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), 1208 asmap->raidAddress, &which_ru); 1209#if RF_DEBUG_DAG 1210 if (rf_dagDebug) { 1211 printf("[Creating RAID level 1 write DAG]\n"); 1212 } 1213#endif 1214 dag_h->creator = "RaidOneWriteDAG"; 1215 1216 /* 2 implies access not SU aligned */ 1217 nWmirNodes = (asmap->parityInfo->next) ? 2 : 1; 1218 nWndNodes = (asmap->physInfo->next) ? 2 : 1; 1219 1220 /* alloc the Wnd nodes and the Wmir node */ 1221 if (asmap->numDataFailed == 1) 1222 nWndNodes--; 1223 if (asmap->numParityFailed == 1) 1224 nWmirNodes--; 1225 1226 /* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock 1227 * + terminator) */ 1228 for (i = 0; i < nWndNodes; i++) { 1229 tmpNode = rf_AllocDAGNode(); 1230 tmpNode->list_next = dag_h->nodes; 1231 dag_h->nodes = tmpNode; 1232 } 1233 wndNode = dag_h->nodes; 1234 1235 for (i = 0; i < nWmirNodes; i++) { 1236 tmpNode = rf_AllocDAGNode(); 1237 tmpNode->list_next = dag_h->nodes; 1238 dag_h->nodes = tmpNode; 1239 } 1240 wmirNode = dag_h->nodes; 1241 1242 commitNode = rf_AllocDAGNode(); 1243 commitNode->list_next = dag_h->nodes; 1244 dag_h->nodes = commitNode; 1245 1246 unblockNode = rf_AllocDAGNode(); 1247 unblockNode->list_next = dag_h->nodes; 1248 dag_h->nodes = unblockNode; 1249 1250 termNode = rf_AllocDAGNode(); 1251 termNode->list_next = dag_h->nodes; 1252 dag_h->nodes = termNode; 1253 1254 /* this dag can commit immediately */ 1255 dag_h->numCommitNodes = 1; 1256 dag_h->numCommits = 0; 1257 dag_h->numSuccedents = 1; 1258 1259 /* initialize the commit, unblock, and term nodes */ 1260 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, 1261 rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes), 1262 0, 0, 0, dag_h, "Cmt", allocList); 1263 rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, 1264 rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes), 1265 0, 0, dag_h, "Nil", allocList); 1266 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, 1267 rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, 1268 dag_h, "Trm", allocList); 1269 1270 /* initialize the wnd nodes */ 1271 if (nWndNodes > 0) { 1272 pda = asmap->physInfo; 1273 tmpwndNode = wndNode; 1274 for (i = 0; i < nWndNodes; i++) { 1275 rf_InitNode(tmpwndNode, rf_wait, RF_FALSE, 1276 rf_DiskWriteFunc, rf_DiskWriteUndoFunc, 1277 rf_GenericWakeupFunc, 1, 1, 4, 0, 1278 dag_h, "Wpd", allocList); 1279 RF_ASSERT(pda != NULL); 1280 tmpwndNode->params[0].p = pda; 1281 tmpwndNode->params[1].p = pda->bufPtr; 1282 tmpwndNode->params[2].v = parityStripeID; 1283 tmpwndNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 1284 pda = pda->next; 1285 tmpwndNode = tmpwndNode->list_next; 1286 } 1287 RF_ASSERT(pda == NULL); 1288 } 1289 /* initialize the mirror nodes */ 1290 if (nWmirNodes > 0) { 1291 pda = asmap->physInfo; 1292 pdaP = asmap->parityInfo; 1293 tmpwmirNode = wmirNode; 1294 for (i = 0; i < nWmirNodes; i++) { 1295 rf_InitNode(tmpwmirNode, rf_wait, RF_FALSE, 1296 rf_DiskWriteFunc, rf_DiskWriteUndoFunc, 1297 rf_GenericWakeupFunc, 1, 1, 4, 0, 1298 dag_h, "Wsd", allocList); 1299 RF_ASSERT(pda != NULL); 1300 tmpwmirNode->params[0].p = pdaP; 1301 tmpwmirNode->params[1].p = pda->bufPtr; 1302 tmpwmirNode->params[2].v = parityStripeID; 1303 tmpwmirNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 1304 pda = pda->next; 1305 pdaP = pdaP->next; 1306 tmpwmirNode = tmpwmirNode->list_next; 1307 } 1308 RF_ASSERT(pda == NULL); 1309 RF_ASSERT(pdaP == NULL); 1310 } 1311 /* link the header node to the commit node */ 1312 RF_ASSERT(dag_h->numSuccedents == 1); 1313 RF_ASSERT(commitNode->numAntecedents == 0); 1314 dag_h->succedents[0] = commitNode; 1315 1316 /* link the commit node to the write nodes */ 1317 RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes)); 1318 tmpwndNode = wndNode; 1319 for (i = 0; i < nWndNodes; i++) { 1320 RF_ASSERT(tmpwndNode->numAntecedents == 1); 1321 commitNode->succedents[i] = tmpwndNode; 1322 tmpwndNode->antecedents[0] = commitNode; 1323 tmpwndNode->antType[0] = rf_control; 1324 tmpwndNode = tmpwndNode->list_next; 1325 } 1326 tmpwmirNode = wmirNode; 1327 for (i = 0; i < nWmirNodes; i++) { 1328 RF_ASSERT(tmpwmirNode->numAntecedents == 1); 1329 commitNode->succedents[i + nWndNodes] = tmpwmirNode; 1330 tmpwmirNode->antecedents[0] = commitNode; 1331 tmpwmirNode->antType[0] = rf_control; 1332 tmpwmirNode = tmpwmirNode->list_next; 1333 } 1334 1335 /* link the write nodes to the unblock node */ 1336 RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes)); 1337 tmpwndNode = wndNode; 1338 for (i = 0; i < nWndNodes; i++) { 1339 RF_ASSERT(tmpwndNode->numSuccedents == 1); 1340 tmpwndNode->succedents[0] = unblockNode; 1341 unblockNode->antecedents[i] = tmpwndNode; 1342 unblockNode->antType[i] = rf_control; 1343 tmpwndNode = tmpwndNode->list_next; 1344 } 1345 tmpwmirNode = wmirNode; 1346 for (i = 0; i < nWmirNodes; i++) { 1347 RF_ASSERT(tmpwmirNode->numSuccedents == 1); 1348 tmpwmirNode->succedents[0] = unblockNode; 1349 unblockNode->antecedents[i + nWndNodes] = tmpwmirNode; 1350 unblockNode->antType[i + nWndNodes] = rf_control; 1351 tmpwmirNode = tmpwmirNode->list_next; 1352 } 1353 1354 /* link the unblock node to the term node */ 1355 RF_ASSERT(unblockNode->numSuccedents == 1); 1356 RF_ASSERT(termNode->numAntecedents == 1); 1357 RF_ASSERT(termNode->numSuccedents == 0); 1358 unblockNode->succedents[0] = termNode; 1359 termNode->antecedents[0] = unblockNode; 1360 termNode->antType[0] = rf_control; 1361} 1362