rf_dagffwr.c revision 1.22
1/*	$NetBSD: rf_dagffwr.c,v 1.22 2004/03/18 16:40:05 oster Exp $	*/
2/*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29/*
30 * rf_dagff.c
31 *
32 * code for creating fault-free DAGs
33 *
34 */
35
36#include <sys/cdefs.h>
37__KERNEL_RCSID(0, "$NetBSD: rf_dagffwr.c,v 1.22 2004/03/18 16:40:05 oster Exp $");
38
39#include <dev/raidframe/raidframevar.h>
40
41#include "rf_raid.h"
42#include "rf_dag.h"
43#include "rf_dagutils.h"
44#include "rf_dagfuncs.h"
45#include "rf_debugMem.h"
46#include "rf_dagffrd.h"
47#include "rf_general.h"
48#include "rf_dagffwr.h"
49
50/******************************************************************************
51 *
52 * General comments on DAG creation:
53 *
54 * All DAGs in this file use roll-away error recovery.  Each DAG has a single
55 * commit node, usually called "Cmt."  If an error occurs before the Cmt node
56 * is reached, the execution engine will halt forward execution and work
57 * backward through the graph, executing the undo functions.  Assuming that
58 * each node in the graph prior to the Cmt node are undoable and atomic - or -
59 * does not make changes to permanent state, the graph will fail atomically.
60 * If an error occurs after the Cmt node executes, the engine will roll-forward
61 * through the graph, blindly executing nodes until it reaches the end.
62 * If a graph reaches the end, it is assumed to have completed successfully.
63 *
64 * A graph has only 1 Cmt node.
65 *
66 */
67
68
69/******************************************************************************
70 *
71 * The following wrappers map the standard DAG creation interface to the
72 * DAG creation routines.  Additionally, these wrappers enable experimentation
73 * with new DAG structures by providing an extra level of indirection, allowing
74 * the DAG creation routines to be replaced at this single point.
75 */
76
77
78void
79rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
80			      RF_DagHeader_t *dag_h, void *bp,
81			      RF_RaidAccessFlags_t flags,
82			      RF_AllocListElem_t *allocList,
83			      RF_IoType_t type)
84{
85	rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
86				 RF_IO_TYPE_WRITE);
87}
88
89void
90rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
91		       RF_DagHeader_t *dag_h, void *bp,
92		       RF_RaidAccessFlags_t flags,
93		       RF_AllocListElem_t *allocList,
94		       RF_IoType_t type)
95{
96	rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
97				 RF_IO_TYPE_WRITE);
98}
99
100void
101rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
102		       RF_DagHeader_t *dag_h, void *bp,
103		       RF_RaidAccessFlags_t flags,
104		       RF_AllocListElem_t *allocList)
105{
106	/* "normal" rollaway */
107	rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
108				     allocList, &rf_xorFuncs, NULL);
109}
110
111void
112rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
113		       RF_DagHeader_t *dag_h, void *bp,
114		       RF_RaidAccessFlags_t flags,
115		       RF_AllocListElem_t *allocList)
116{
117	/* "normal" rollaway */
118	rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
119				     allocList, 1, rf_RegularXorFunc, RF_TRUE);
120}
121
122
123/******************************************************************************
124 *
125 * DAG creation code begins here
126 */
127
128
129/******************************************************************************
130 *
131 * creates a DAG to perform a large-write operation:
132 *
133 *           / Rod \           / Wnd \
134 * H -- block- Rod - Xor - Cmt - Wnd --- T
135 *           \ Rod /          \  Wnp /
136 *                             \[Wnq]/
137 *
138 * The XOR node also does the Q calculation in the P+Q architecture.
139 * All nodes are before the commit node (Cmt) are assumed to be atomic and
140 * undoable - or - they make no changes to permanent state.
141 *
142 * Rod = read old data
143 * Cmt = commit node
144 * Wnp = write new parity
145 * Wnd = write new data
146 * Wnq = write new "q"
147 * [] denotes optional segments in the graph
148 *
149 * Parameters:  raidPtr   - description of the physical array
150 *              asmap     - logical & physical addresses for this access
151 *              bp        - buffer ptr (holds write data)
152 *              flags     - general flags (e.g. disk locking)
153 *              allocList - list of memory allocated in DAG creation
154 *              nfaults   - number of faults array can tolerate
155 *                          (equal to # redundancy units in stripe)
156 *              redfuncs  - list of redundancy generating functions
157 *
158 *****************************************************************************/
159
160void
161rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
162			     RF_DagHeader_t *dag_h, void *bp,
163			     RF_RaidAccessFlags_t flags,
164			     RF_AllocListElem_t *allocList,
165			     int nfaults, int (*redFunc) (RF_DagNode_t *),
166			     int allowBufferRecycle)
167{
168	RF_DagNode_t *wndNodes, *rodNodes, *xorNode, *wnpNode, *tmpNode;
169	RF_DagNode_t *wnqNode, *blockNode, *commitNode, *termNode;
170	int     nWndNodes, nRodNodes, i, nodeNum, asmNum;
171	RF_AccessStripeMapHeader_t *new_asm_h[2];
172	RF_StripeNum_t parityStripeID;
173	char   *sosBuffer, *eosBuffer;
174	RF_ReconUnitNum_t which_ru;
175	RF_RaidLayout_t *layoutPtr;
176	RF_PhysDiskAddr_t *pda;
177
178	layoutPtr = &(raidPtr->Layout);
179	parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
180							asmap->raidAddress,
181							&which_ru);
182
183#if RF_DEBUG_DAG
184	if (rf_dagDebug) {
185		printf("[Creating large-write DAG]\n");
186	}
187#endif
188	dag_h->creator = "LargeWriteDAG";
189
190	dag_h->numCommitNodes = 1;
191	dag_h->numCommits = 0;
192	dag_h->numSuccedents = 1;
193
194	/* alloc the nodes: Wnd, xor, commit, block, term, and  Wnp */
195	nWndNodes = asmap->numStripeUnitsAccessed;
196
197	for (i = 0; i < nWndNodes; i++) {
198		tmpNode = rf_AllocDAGNode();
199		tmpNode->list_next = dag_h->nodes;
200		dag_h->nodes = tmpNode;
201	}
202	wndNodes = dag_h->nodes;
203
204	xorNode = rf_AllocDAGNode();
205	xorNode->list_next = dag_h->nodes;
206	dag_h->nodes = xorNode;
207
208	wnpNode = rf_AllocDAGNode();
209	wnpNode->list_next = dag_h->nodes;
210	dag_h->nodes = wnpNode;
211
212	blockNode = rf_AllocDAGNode();
213	blockNode->list_next = dag_h->nodes;
214	dag_h->nodes = blockNode;
215
216	commitNode = rf_AllocDAGNode();
217	commitNode->list_next = dag_h->nodes;
218	dag_h->nodes = commitNode;
219
220	termNode = rf_AllocDAGNode();
221	termNode->list_next = dag_h->nodes;
222	dag_h->nodes = termNode;
223
224#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
225	if (nfaults == 2) {
226		wnqNode = rf_AllocDAGNode();
227	} else {
228#endif
229		wnqNode = NULL;
230#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
231	}
232#endif
233	rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h,
234					new_asm_h, &nRodNodes, &sosBuffer,
235					&eosBuffer, allocList);
236	if (nRodNodes > 0) {
237		for (i = 0; i < nRodNodes; i++) {
238			tmpNode = rf_AllocDAGNode();
239			tmpNode->list_next = dag_h->nodes;
240			dag_h->nodes = tmpNode;
241		}
242		rodNodes = dag_h->nodes;
243	} else {
244		rodNodes = NULL;
245	}
246
247	/* begin node initialization */
248	if (nRodNodes > 0) {
249		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
250			    rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0,
251			    dag_h, "Nil", allocList);
252	} else {
253		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
254			    rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0,
255			    dag_h, "Nil", allocList);
256	}
257
258	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
259		    rf_NullNodeUndoFunc, NULL, nWndNodes + nfaults, 1, 0, 0,
260		    dag_h, "Cmt", allocList);
261	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
262		    rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0,
263		    dag_h, "Trm", allocList);
264
265	/* initialize the Rod nodes */
266	tmpNode = rodNodes;
267	for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
268		if (new_asm_h[asmNum]) {
269			pda = new_asm_h[asmNum]->stripeMap->physInfo;
270			while (pda) {
271				rf_InitNode(tmpNode, rf_wait,
272					    RF_FALSE, rf_DiskReadFunc,
273					    rf_DiskReadUndoFunc,
274					    rf_GenericWakeupFunc,
275					    1, 1, 4, 0, dag_h,
276					    "Rod", allocList);
277				tmpNode->params[0].p = pda;
278				tmpNode->params[1].p = pda->bufPtr;
279				tmpNode->params[2].v = parityStripeID;
280				tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
281				    which_ru);
282				nodeNum++;
283				pda = pda->next;
284				tmpNode = tmpNode->list_next;
285			}
286		}
287	}
288	RF_ASSERT(nodeNum == nRodNodes);
289
290	/* initialize the wnd nodes */
291	pda = asmap->physInfo;
292	tmpNode = wndNodes;
293	for (i = 0; i < nWndNodes; i++) {
294		rf_InitNode(tmpNode, rf_wait, RF_FALSE,
295			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
296			    rf_GenericWakeupFunc, 1, 1, 4, 0,
297			    dag_h, "Wnd", allocList);
298		RF_ASSERT(pda != NULL);
299		tmpNode->params[0].p = pda;
300		tmpNode->params[1].p = pda->bufPtr;
301		tmpNode->params[2].v = parityStripeID;
302		tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
303		pda = pda->next;
304		tmpNode = tmpNode->list_next;
305	}
306
307	/* initialize the redundancy node */
308	if (nRodNodes > 0) {
309		rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
310			    rf_NullNodeUndoFunc, NULL, 1,
311			    nRodNodes, 2 * (nWndNodes + nRodNodes) + 1,
312			    nfaults, dag_h, "Xr ", allocList);
313	} else {
314		rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
315			    rf_NullNodeUndoFunc, NULL, 1,
316			    1, 2 * (nWndNodes + nRodNodes) + 1,
317			    nfaults, dag_h, "Xr ", allocList);
318	}
319	xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
320	tmpNode = wndNodes;
321	for (i = 0; i < nWndNodes; i++) {
322		/* pda */
323		xorNode->params[2 * i + 0] = tmpNode->params[0];
324		/* buf ptr */
325		xorNode->params[2 * i + 1] = tmpNode->params[1];
326		tmpNode = tmpNode->list_next;
327	}
328	tmpNode = rodNodes;
329	for (i = 0; i < nRodNodes; i++) {
330		/* pda */
331		xorNode->params[2 * (nWndNodes + i) + 0] = tmpNode->params[0];
332		/* buf ptr */
333		xorNode->params[2 * (nWndNodes + i) + 1] = tmpNode->params[1];
334		tmpNode = tmpNode->list_next;
335	}
336	/* xor node needs to get at RAID information */
337	xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr;
338
339	/*
340         * Look for an Rod node that reads a complete SU. If none,
341         * alloc a buffer to receive the parity info. Note that we
342         * can't use a new data buffer because it will not have gotten
343         * written when the xor occurs.  */
344	if (allowBufferRecycle) {
345		tmpNode = rodNodes;
346		for (i = 0; i < nRodNodes; i++) {
347			if (((RF_PhysDiskAddr_t *) tmpNode->params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
348				break;
349			tmpNode = tmpNode->list_next;
350		}
351	}
352	if ((!allowBufferRecycle) || (i == nRodNodes)) {
353		RF_MallocAndAdd(xorNode->results[0],
354				rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit),
355				(void *), allocList);
356	} else {
357		/* this works because the only way we get here is if
358		   allowBufferRecycle is true and we went through the
359		   above for loop, and exited via the break before
360		   i==nRodNodes was true.  That means tmpNode will
361		   still point to a valid node -- the one we want for
362		   here! */
363		xorNode->results[0] = tmpNode->params[1].p;
364	}
365
366	/* initialize the Wnp node */
367	rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
368		    rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
369		    dag_h, "Wnp", allocList);
370	wnpNode->params[0].p = asmap->parityInfo;
371	wnpNode->params[1].p = xorNode->results[0];
372	wnpNode->params[2].v = parityStripeID;
373	wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
374	/* parityInfo must describe entire parity unit */
375	RF_ASSERT(asmap->parityInfo->next == NULL);
376
377#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
378	if (nfaults == 2) {
379		/*
380	         * We never try to recycle a buffer for the Q calcuation
381	         * in addition to the parity. This would cause two buffers
382	         * to get smashed during the P and Q calculation, guaranteeing
383	         * one would be wrong.
384	         */
385		RF_MallocAndAdd(xorNode->results[1],
386				rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit),
387				(void *), allocList);
388		rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
389			    rf_DiskWriteUndoFunc, rf_GenericWakeupFunc,
390			    1, 1, 4, 0, dag_h, "Wnq", allocList);
391		wnqNode->params[0].p = asmap->qInfo;
392		wnqNode->params[1].p = xorNode->results[1];
393		wnqNode->params[2].v = parityStripeID;
394		wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
395		/* parityInfo must describe entire parity unit */
396		RF_ASSERT(asmap->parityInfo->next == NULL);
397	}
398#endif
399	/*
400         * Connect nodes to form graph.
401         */
402
403	/* connect dag header to block node */
404	RF_ASSERT(blockNode->numAntecedents == 0);
405	dag_h->succedents[0] = blockNode;
406
407	if (nRodNodes > 0) {
408		/* connect the block node to the Rod nodes */
409		RF_ASSERT(blockNode->numSuccedents == nRodNodes);
410		RF_ASSERT(xorNode->numAntecedents == nRodNodes);
411		tmpNode = rodNodes;
412		for (i = 0; i < nRodNodes; i++) {
413			RF_ASSERT(tmpNode.numAntecedents == 1);
414			blockNode->succedents[i] = tmpNode;
415			tmpNode->antecedents[0] = blockNode;
416			tmpNode->antType[0] = rf_control;
417
418			/* connect the Rod nodes to the Xor node */
419			RF_ASSERT(tmpNode.numSuccedents == 1);
420			tmpNode->succedents[0] = xorNode;
421			xorNode->antecedents[i] = tmpNode;
422			xorNode->antType[i] = rf_trueData;
423			tmpNode = tmpNode->list_next;
424		}
425	} else {
426		/* connect the block node to the Xor node */
427		RF_ASSERT(blockNode->numSuccedents == 1);
428		RF_ASSERT(xorNode->numAntecedents == 1);
429		blockNode->succedents[0] = xorNode;
430		xorNode->antecedents[0] = blockNode;
431		xorNode->antType[0] = rf_control;
432	}
433
434	/* connect the xor node to the commit node */
435	RF_ASSERT(xorNode->numSuccedents == 1);
436	RF_ASSERT(commitNode->numAntecedents == 1);
437	xorNode->succedents[0] = commitNode;
438	commitNode->antecedents[0] = xorNode;
439	commitNode->antType[0] = rf_control;
440
441	/* connect the commit node to the write nodes */
442	RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults);
443	tmpNode = wndNodes;
444	for (i = 0; i < nWndNodes; i++) {
445		RF_ASSERT(wndNodes->numAntecedents == 1);
446		commitNode->succedents[i] = tmpNode;
447		tmpNode->antecedents[0] = commitNode;
448		tmpNode->antType[0] = rf_control;
449		tmpNode = tmpNode->list_next;
450	}
451	RF_ASSERT(wnpNode->numAntecedents == 1);
452	commitNode->succedents[nWndNodes] = wnpNode;
453	wnpNode->antecedents[0] = commitNode;
454	wnpNode->antType[0] = rf_trueData;
455#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
456	if (nfaults == 2) {
457		RF_ASSERT(wnqNode->numAntecedents == 1);
458		commitNode->succedents[nWndNodes + 1] = wnqNode;
459		wnqNode->antecedents[0] = commitNode;
460		wnqNode->antType[0] = rf_trueData;
461	}
462#endif
463	/* connect the write nodes to the term node */
464	RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults);
465	RF_ASSERT(termNode->numSuccedents == 0);
466	tmpNode = wndNodes;
467	for (i = 0; i < nWndNodes; i++) {
468		RF_ASSERT(wndNodes->numSuccedents == 1);
469		tmpNode->succedents[0] = termNode;
470		termNode->antecedents[i] = tmpNode;
471		termNode->antType[i] = rf_control;
472		tmpNode = tmpNode->list_next;
473	}
474	RF_ASSERT(wnpNode->numSuccedents == 1);
475	wnpNode->succedents[0] = termNode;
476	termNode->antecedents[nWndNodes] = wnpNode;
477	termNode->antType[nWndNodes] = rf_control;
478#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
479	if (nfaults == 2) {
480		RF_ASSERT(wnqNode->numSuccedents == 1);
481		wnqNode->succedents[0] = termNode;
482		termNode->antecedents[nWndNodes + 1] = wnqNode;
483		termNode->antType[nWndNodes + 1] = rf_control;
484	}
485#endif
486}
487/******************************************************************************
488 *
489 * creates a DAG to perform a small-write operation (either raid 5 or pq),
490 * which is as follows:
491 *
492 * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm
493 *            \- Rod X      /     \----> Wnd [Und]-/
494 *           [\- Rod X     /       \---> Wnd [Und]-/]
495 *           [\- Roq -> Q /         \--> Wnq [Unq]-/]
496 *
497 * Rop = read old parity
498 * Rod = read old data
499 * Roq = read old "q"
500 * Cmt = commit node
501 * Und = unlock data disk
502 * Unp = unlock parity disk
503 * Unq = unlock q disk
504 * Wnp = write new parity
505 * Wnd = write new data
506 * Wnq = write new "q"
507 * [ ] denotes optional segments in the graph
508 *
509 * Parameters:  raidPtr   - description of the physical array
510 *              asmap     - logical & physical addresses for this access
511 *              bp        - buffer ptr (holds write data)
512 *              flags     - general flags (e.g. disk locking)
513 *              allocList - list of memory allocated in DAG creation
514 *              pfuncs    - list of parity generating functions
515 *              qfuncs    - list of q generating functions
516 *
517 * A null qfuncs indicates single fault tolerant
518 *****************************************************************************/
519
520void
521rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
522			     RF_DagHeader_t *dag_h, void *bp,
523			     RF_RaidAccessFlags_t flags,
524			     RF_AllocListElem_t *allocList,
525			     const RF_RedFuncs_t *pfuncs,
526			     const RF_RedFuncs_t *qfuncs)
527{
528	RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode;
529	RF_DagNode_t *tmpNode, *tmpreadDataNode, *tmpreadParityNode;
530	RF_DagNode_t *xorNodes, *qNodes, *blockNode, *commitNode;
531	RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes;
532	RF_DagNode_t *tmpxorNode, *tmpqNode, *tmpwriteDataNode, *tmpreadQNode;
533	RF_DagNode_t *tmpwriteParityNode;
534#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
535	RF_DagNode_t *tmpwriteQNode;
536#endif
537	int     i, j, nNodes, totalNumNodes;
538	RF_ReconUnitNum_t which_ru;
539	int     (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
540	int     (*qfunc) (RF_DagNode_t *);
541	int     numDataNodes, numParityNodes;
542	RF_StripeNum_t parityStripeID;
543	RF_PhysDiskAddr_t *pda;
544	char   *name, *qname;
545	long    nfaults;
546
547	nfaults = qfuncs ? 2 : 1;
548
549	parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
550	    asmap->raidAddress, &which_ru);
551	pda = asmap->physInfo;
552	numDataNodes = asmap->numStripeUnitsAccessed;
553	numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
554
555#if RF_DEBUG_DAG
556	if (rf_dagDebug) {
557		printf("[Creating small-write DAG]\n");
558	}
559#endif
560	RF_ASSERT(numDataNodes > 0);
561	dag_h->creator = "SmallWriteDAG";
562
563	dag_h->numCommitNodes = 1;
564	dag_h->numCommits = 0;
565	dag_h->numSuccedents = 1;
566
567	/*
568         * DAG creation occurs in four steps:
569         * 1. count the number of nodes in the DAG
570         * 2. create the nodes
571         * 3. initialize the nodes
572         * 4. connect the nodes
573         */
574
575	/*
576         * Step 1. compute number of nodes in the graph
577         */
578
579	/* number of nodes: a read and write for each data unit a
580	 * redundancy computation node for each parity node (nfaults *
581	 * nparity) a read and write for each parity unit a block and
582	 * commit node (2) a terminate node if atomic RMW an unlock
583	 * node for each data unit, redundancy unit */
584	totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes)
585	    + (nfaults * 2 * numParityNodes) + 3;
586	/*
587         * Step 2. create the nodes
588         */
589
590	blockNode = rf_AllocDAGNode();
591	blockNode->list_next = dag_h->nodes;
592	dag_h->nodes = blockNode;
593
594	commitNode = rf_AllocDAGNode();
595	commitNode->list_next = dag_h->nodes;
596	dag_h->nodes = commitNode;
597
598	for (i = 0; i < numDataNodes; i++) {
599		tmpNode = rf_AllocDAGNode();
600		tmpNode->list_next = dag_h->nodes;
601		dag_h->nodes = tmpNode;
602	}
603	readDataNodes = dag_h->nodes;
604
605	for (i = 0; i < numParityNodes; i++) {
606		tmpNode = rf_AllocDAGNode();
607		tmpNode->list_next = dag_h->nodes;
608		dag_h->nodes = tmpNode;
609	}
610	readParityNodes = dag_h->nodes;
611
612	for (i = 0; i < numDataNodes; i++) {
613		tmpNode = rf_AllocDAGNode();
614		tmpNode->list_next = dag_h->nodes;
615		dag_h->nodes = tmpNode;
616	}
617	writeDataNodes = dag_h->nodes;
618
619	for (i = 0; i < numParityNodes; i++) {
620		tmpNode = rf_AllocDAGNode();
621		tmpNode->list_next = dag_h->nodes;
622		dag_h->nodes = tmpNode;
623	}
624	writeParityNodes = dag_h->nodes;
625
626	for (i = 0; i < numParityNodes; i++) {
627		tmpNode = rf_AllocDAGNode();
628		tmpNode->list_next = dag_h->nodes;
629		dag_h->nodes = tmpNode;
630	}
631	xorNodes = dag_h->nodes;
632
633	termNode = rf_AllocDAGNode();
634	termNode->list_next = dag_h->nodes;
635	dag_h->nodes = termNode;
636
637#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
638	if (nfaults == 2) {
639		for (i = 0; i < numParityNodes; i++) {
640			tmpNode = rf_AllocDAGNode();
641			tmpNode->list_next = dag_h->nodes;
642			dag_h->nodes = tmpNode;
643		}
644		readQNodes = dag_h->nodes;
645
646		for (i = 0; i < numParityNodes; i++) {
647			tmpNode = rf_AllocDAGNode();
648			tmpNode->list_next = dag_h->nodes;
649			dag_h->nodes = tmpNode;
650		}
651		writeQNodes = dag_h->nodes;
652
653		for (i = 0; i < numParityNodes; i++) {
654			tmpNode = rf_AllocDAGNode();
655			tmpNode->list_next = dag_h->nodes;
656			dag_h->nodes = tmpNode;
657		}
658		qNodes = dag_h->nodes;
659	} else {
660#endif
661		readQNodes = writeQNodes = qNodes = NULL;
662#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
663	}
664#endif
665	RF_ASSERT(i == totalNumNodes);
666
667	/*
668         * Step 3. initialize the nodes
669         */
670	/* initialize block node (Nil) */
671	nNodes = numDataNodes + (nfaults * numParityNodes);
672	rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
673		    rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0,
674		    dag_h, "Nil", allocList);
675
676	/* initialize commit node (Cmt) */
677	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
678		    rf_NullNodeUndoFunc, NULL, nNodes,
679		    (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList);
680
681	/* initialize terminate node (Trm) */
682	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
683		    rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0,
684		    dag_h, "Trm", allocList);
685
686	/* initialize nodes which read old data (Rod) */
687	tmpreadDataNode = readDataNodes;
688	for (i = 0; i < numDataNodes; i++) {
689		rf_InitNode(tmpreadDataNode, rf_wait, RF_FALSE,
690			    rf_DiskReadFunc, rf_DiskReadUndoFunc,
691			    rf_GenericWakeupFunc, (nfaults * numParityNodes),
692			    1, 4, 0, dag_h, "Rod", allocList);
693		RF_ASSERT(pda != NULL);
694		/* physical disk addr desc */
695		tmpreadDataNode->params[0].p = pda;
696		/* buffer to hold old data */
697		tmpreadDataNode->params[1].p = rf_AllocBuffer(raidPtr, pda, allocList);
698		tmpreadDataNode->params[2].v = parityStripeID;
699		tmpreadDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
700		    which_ru);
701		pda = pda->next;
702		for (j = 0; j < tmpreadDataNode->numSuccedents; j++) {
703			tmpreadDataNode->propList[j] = NULL;
704		}
705		tmpreadDataNode = tmpreadDataNode->list_next;
706	}
707
708	/* initialize nodes which read old parity (Rop) */
709	pda = asmap->parityInfo;
710	i = 0;
711	tmpreadParityNode = readParityNodes;
712	for (i = 0; i < numParityNodes; i++) {
713		RF_ASSERT(pda != NULL);
714		rf_InitNode(tmpreadParityNode, rf_wait, RF_FALSE,
715			    rf_DiskReadFunc, rf_DiskReadUndoFunc,
716			    rf_GenericWakeupFunc, numParityNodes, 1, 4, 0,
717			    dag_h, "Rop", allocList);
718		tmpreadParityNode->params[0].p = pda;
719		/* buffer to hold old parity */
720		tmpreadParityNode->params[1].p = rf_AllocBuffer(raidPtr, pda, allocList);
721		tmpreadParityNode->params[2].v = parityStripeID;
722		tmpreadParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
723		    which_ru);
724		pda = pda->next;
725		for (j = 0; j < tmpreadParityNode->numSuccedents; j++) {
726			tmpreadParityNode->propList[0] = NULL;
727		}
728		tmpreadParityNode = tmpreadParityNode->list_next;
729	}
730
731#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
732	/* initialize nodes which read old Q (Roq) */
733	if (nfaults == 2) {
734		pda = asmap->qInfo;
735		tmpreadQNode = readQNodes;
736		for (i = 0; i < numParityNodes; i++) {
737			RF_ASSERT(pda != NULL);
738			rf_InitNode(tmpreadQNode, rf_wait, RF_FALSE,
739				    rf_DiskReadFunc, rf_DiskReadUndoFunc,
740				    rf_GenericWakeupFunc, numParityNodes,
741				    1, 4, 0, dag_h, "Roq", allocList);
742			tmpreadQNode->params[0].p = pda;
743			/* buffer to hold old Q */
744			tmpreadQNode->params[1].p = rf_AllocBuffer(raidPtr, pda, allocList);
745			tmpreadQNode->params[2].v = parityStripeID;
746			tmpreadQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
747			    which_ru);
748			pda = pda->next;
749			for (j = 0; j < tmpreadQNode->numSuccedents; j++) {
750				tmpreadQNode->propList[0] = NULL;
751			}
752			tmpreadQNode = tmpreadQNode->list_next;
753		}
754	}
755#endif
756	/* initialize nodes which write new data (Wnd) */
757	pda = asmap->physInfo;
758	tmpwriteDataNode = writeDataNodes;
759	for (i = 0; i < numDataNodes; i++) {
760		RF_ASSERT(pda != NULL);
761		rf_InitNode(tmpwriteDataNode, rf_wait, RF_FALSE,
762			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
763			    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
764			    "Wnd", allocList);
765		/* physical disk addr desc */
766		tmpwriteDataNode->params[0].p = pda;
767		/* buffer holding new data to be written */
768		tmpwriteDataNode->params[1].p = pda->bufPtr;
769		tmpwriteDataNode->params[2].v = parityStripeID;
770		tmpwriteDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
771		    which_ru);
772		pda = pda->next;
773		tmpwriteDataNode = tmpwriteDataNode->list_next;
774	}
775
776	/*
777         * Initialize nodes which compute new parity and Q.
778         */
779	/*
780         * We use the simple XOR func in the double-XOR case, and when
781         * we're accessing only a portion of one stripe unit. The
782         * distinction between the two is that the regular XOR func
783         * assumes that the targbuf is a full SU in size, and examines
784         * the pda associated with the buffer to decide where within
785         * the buffer to XOR the data, whereas the simple XOR func
786         * just XORs the data into the start of the buffer.  */
787	if ((numParityNodes == 2) || ((numDataNodes == 1)
788		&& (asmap->totalSectorsAccessed <
789		    raidPtr->Layout.sectorsPerStripeUnit))) {
790		func = pfuncs->simple;
791		undoFunc = rf_NullNodeUndoFunc;
792		name = pfuncs->SimpleName;
793		if (qfuncs) {
794			qfunc = qfuncs->simple;
795			qname = qfuncs->SimpleName;
796		} else {
797			qfunc = NULL;
798			qname = NULL;
799		}
800	} else {
801		func = pfuncs->regular;
802		undoFunc = rf_NullNodeUndoFunc;
803		name = pfuncs->RegularName;
804		if (qfuncs) {
805			qfunc = qfuncs->regular;
806			qname = qfuncs->RegularName;
807		} else {
808			qfunc = NULL;
809			qname = NULL;
810		}
811	}
812	/*
813         * Initialize the xor nodes: params are {pda,buf}
814         * from {Rod,Wnd,Rop} nodes, and raidPtr
815         */
816	if (numParityNodes == 2) {
817		/* double-xor case */
818		tmpxorNode = xorNodes;
819		tmpreadDataNode = readDataNodes;
820		tmpreadParityNode = readParityNodes;
821		tmpwriteDataNode = writeDataNodes;
822		tmpqNode = qNodes;
823		tmpreadQNode = readQNodes;
824		for (i = 0; i < numParityNodes; i++) {
825			/* note: no wakeup func for xor */
826			rf_InitNode(tmpxorNode, rf_wait, RF_FALSE, func,
827				    undoFunc, NULL, 1,
828				    (numDataNodes + numParityNodes),
829				    7, 1, dag_h, name, allocList);
830			tmpxorNode->flags |= RF_DAGNODE_FLAG_YIELD;
831			tmpxorNode->params[0] = tmpreadDataNode->params[0];
832			tmpxorNode->params[1] = tmpreadDataNode->params[1];
833			tmpxorNode->params[2] = tmpreadParityNode->params[0];
834			tmpxorNode->params[3] = tmpreadParityNode->params[1];
835			tmpxorNode->params[4] = tmpwriteDataNode->params[0];
836			tmpxorNode->params[5] = tmpwriteDataNode->params[1];
837			tmpxorNode->params[6].p = raidPtr;
838			/* use old parity buf as target buf */
839			tmpxorNode->results[0] = tmpreadParityNode->params[1].p;
840#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
841			if (nfaults == 2) {
842				/* note: no wakeup func for qor */
843				rf_InitNode(tmpqNode, rf_wait, RF_FALSE,
844					    qfunc, undoFunc, NULL, 1,
845					    (numDataNodes + numParityNodes),
846					    7, 1, dag_h, qname, allocList);
847				tmpqNode->params[0] = tmpreadDataNode->params[0];
848				tmpqNode->params[1] = tmpreadDataNode->params[1];
849				tmpqNode->params[2] = tmpreadQNode->.params[0];
850				tmpqNode->params[3] = tmpreadQNode->params[1];
851				tmpqNode->params[4] = tmpwriteDataNode->params[0];
852				tmpqNode->params[5] = tmpwriteDataNode->params[1];
853				tmpqNode->params[6].p = raidPtr;
854				/* use old Q buf as target buf */
855				tmpqNode->results[0] = tmpreadQNode->params[1].p;
856				tmpqNode = tmpqNode->list_next;
857				tmpreadQNodes = tmpreadQNodes->list_next;
858			}
859#endif
860			tmpxorNode = tmpxorNode->list_next;
861			tmpreadDataNode = tmpreadDataNode->list_next;
862			tmpreadParityNode = tmpreadParityNode->list_next;
863			tmpwriteDataNode = tmpwriteDataNode->list_next;
864		}
865	} else {
866		/* there is only one xor node in this case */
867		rf_InitNode(xorNodes, rf_wait, RF_FALSE, func,
868			    undoFunc, NULL, 1, (numDataNodes + numParityNodes),
869			    (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
870			    dag_h, name, allocList);
871		xorNodes->flags |= RF_DAGNODE_FLAG_YIELD;
872		tmpreadDataNode = readDataNodes;
873		for (i = 0; i < numDataNodes; i++) { /* used to be"numDataNodes + 1" until we factored
874							out the "+1" into the "deal with Rop separately below */
875			/* set up params related to Rod nodes */
876			xorNodes->params[2 * i + 0] = tmpreadDataNode->params[0];	/* pda */
877			xorNodes->params[2 * i + 1] = tmpreadDataNode->params[1];	/* buffer ptr */
878			tmpreadDataNode = tmpreadDataNode->list_next;
879		}
880		/* deal with Rop separately */
881		xorNodes->params[2 * numDataNodes + 0] = readParityNodes->params[0];    /* pda */
882		xorNodes->params[2 * numDataNodes + 1] = readParityNodes->params[1];    /* buffer ptr */
883
884		tmpwriteDataNode = writeDataNodes;
885		for (i = 0; i < numDataNodes; i++) {
886			/* set up params related to Wnd and Wnp nodes */
887			xorNodes->params[2 * (numDataNodes + 1 + i) + 0] =	/* pda */
888			    tmpwriteDataNode->params[0];
889			xorNodes->params[2 * (numDataNodes + 1 + i) + 1] =	/* buffer ptr */
890			    tmpwriteDataNode->params[1];
891			tmpwriteDataNode = tmpwriteDataNode->list_next;
892		}
893		/* xor node needs to get at RAID information */
894		xorNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
895		xorNodes->results[0] = readParityNodes->params[1].p;
896#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
897		if (nfaults == 2) {
898			rf_InitNode(qNodes, rf_wait, RF_FALSE, qfunc,
899				    undoFunc, NULL, 1,
900				    (numDataNodes + numParityNodes),
901				    (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
902				    dag_h, qname, allocList);
903			tmpreadDataNode = readDataNodes;
904			for (i = 0; i < numDataNodes; i++) {
905				/* set up params related to Rod */
906				qNodes->params[2 * i + 0] = tmpreadDataNode->params[0];	/* pda */
907				qNodes->params[2 * i + 1] = tmpreadDataNode->params[1];	/* buffer ptr */
908				tmpreadDataNode = tmpreadDataNode->list_next;
909			}
910			/* and read old q */
911			qNodes->params[2 * numDataNodes + 0] =	/* pda */
912			    readQNodes->params[0];
913			qNodes->params[2 * numDataNodes + 1] =	/* buffer ptr */
914			    readQNodes->params[1];
915			tmpwriteDataNode = writeDataNodes;
916			for (i = 0; i < numDataNodes; i++) {
917				/* set up params related to Wnd nodes */
918				qNodes->params[2 * (numDataNodes + 1 + i) + 0] =	/* pda */
919				    tmpwriteDataNode->params[0];
920				qNodes->params[2 * (numDataNodes + 1 + i) + 1] =	/* buffer ptr */
921				    tmpwriteDataNode->params[1];
922				tmpwriteDataNode = tmpwriteDataNode->list_next;
923			}
924			/* xor node needs to get at RAID information */
925			qNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
926			qNodes->results[0] = readQNodes->params[1].p;
927		}
928#endif
929	}
930
931	/* initialize nodes which write new parity (Wnp) */
932	pda = asmap->parityInfo;
933	tmpwriteParityNode = writeParityNodes;
934	tmpxorNode = xorNodes;
935	for (i = 0; i < numParityNodes; i++) {
936		rf_InitNode(tmpwriteParityNode, rf_wait, RF_FALSE,
937			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
938			    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
939			    "Wnp", allocList);
940		RF_ASSERT(pda != NULL);
941		tmpwriteParityNode->params[0].p = pda;	/* param 1 (bufPtr)
942				  			 * filled in by xor node */
943		tmpwriteParityNode->params[1].p = tmpxorNode->results[0];	/* buffer pointer for
944				  						 * parity write
945				  						 * operation */
946		tmpwriteParityNode->params[2].v = parityStripeID;
947		tmpwriteParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
948		    which_ru);
949		pda = pda->next;
950		tmpwriteParityNode = tmpwriteParityNode->list_next;
951		tmpxorNode = tmpxorNode->list_next;
952	}
953
954#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
955	/* initialize nodes which write new Q (Wnq) */
956	if (nfaults == 2) {
957		pda = asmap->qInfo;
958		tmpwriteQNode = writeQNodes;
959		tmpqNode = qNodes;
960		for (i = 0; i < numParityNodes; i++) {
961			rf_InitNode(tmpwriteQNode, rf_wait, RF_FALSE,
962				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
963				    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
964				    "Wnq", allocList);
965			RF_ASSERT(pda != NULL);
966			tmpwriteQNode->params[0].p = pda;	/* param 1 (bufPtr)
967								 * filled in by xor node */
968			tmpwriteQNode->params[1].p = tmpqNode->results[0];	/* buffer pointer for
969										 * parity write
970										 * operation */
971			tmpwriteQNode->params[2].v = parityStripeID;
972			tmpwriteQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
973			    which_ru);
974			pda = pda->next;
975			tmpwriteQNode = tmpwriteQNode->list_next;
976			tmpqNode = tmpqNode->list_next;
977		}
978	}
979#endif
980	/*
981         * Step 4. connect the nodes.
982         */
983
984	/* connect header to block node */
985	dag_h->succedents[0] = blockNode;
986
987	/* connect block node to read old data nodes */
988	RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults)));
989	tmpreadDataNode = readDataNodes;
990	for (i = 0; i < numDataNodes; i++) {
991		blockNode->succedents[i] = tmpreadDataNode;
992		RF_ASSERT(tmpreadDataNode->numAntecedents == 1);
993		tmpreadDataNode->antecedents[0] = blockNode;
994		tmpreadDataNode->antType[0] = rf_control;
995		tmpreadDataNode = tmpreadDataNode->list_next;
996	}
997
998	/* connect block node to read old parity nodes */
999	tmpreadParityNode = readParityNodes;
1000	for (i = 0; i < numParityNodes; i++) {
1001		blockNode->succedents[numDataNodes + i] = tmpreadParityNode;
1002		RF_ASSERT(tmpreadParityNode->numAntecedents == 1);
1003		tmpreadParityNode->antecedents[0] = blockNode;
1004		tmpreadParityNode->antType[0] = rf_control;
1005		tmpreadParityNode = tmpreadParityNode->list_next;
1006	}
1007
1008#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1009	/* connect block node to read old Q nodes */
1010	if (nfaults == 2) {
1011		tmpreadQNode = readQNodes;
1012		for (i = 0; i < numParityNodes; i++) {
1013			blockNode->succedents[numDataNodes + numParityNodes + i] = tmpreadQNode;
1014			RF_ASSERT(tmpreadQNode->numAntecedents == 1);
1015			tmpreadQNode->antecedents[0] = blockNode;
1016			tmpreadQNode->antType[0] = rf_control;
1017			tmpreadQNode = tmpreadQNode->list_next;
1018		}
1019	}
1020#endif
1021	/* connect read old data nodes to xor nodes */
1022	tmpreadDataNode = readDataNodes;
1023	for (i = 0; i < numDataNodes; i++) {
1024		RF_ASSERT(tmpreadDataNode->numSuccedents == (nfaults * numParityNodes));
1025		tmpxorNode = xorNodes;
1026		for (j = 0; j < numParityNodes; j++) {
1027			RF_ASSERT(tmpxorNode->numAntecedents == numDataNodes + numParityNodes);
1028			tmpreadDataNode->succedents[j] = tmpxorNode;
1029			tmpxorNode->antecedents[i] = tmpreadDataNode;
1030			tmpxorNode->antType[i] = rf_trueData;
1031			tmpxorNode = tmpxorNode->list_next;
1032		}
1033		tmpreadDataNode = tmpreadDataNode->list_next;
1034	}
1035
1036#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1037	/* connect read old data nodes to q nodes */
1038	if (nfaults == 2) {
1039		tmpreadDataNode = readDataNodes;
1040		for (i = 0; i < numDataNodes; i++) {
1041			tmpqNode = qNodes;
1042			for (j = 0; j < numParityNodes; j++) {
1043				RF_ASSERT(tmpqNode->numAntecedents == numDataNodes + numParityNodes);
1044				tmpreadDataNode->succedents[numParityNodes + j] = tmpqNode;
1045				tmpqNode->antecedents[i] = tmpreadDataNode;
1046				tmpqNode->antType[i] = rf_trueData;
1047				tmpqNode = tmpqNode->list_next;
1048			}
1049			tmpreadDataNode = tmpreadDataNode->list_next;
1050		}
1051	}
1052#endif
1053	/* connect read old parity nodes to xor nodes */
1054	tmpreadParityNode = readParityNodes;
1055	for (i = 0; i < numParityNodes; i++) {
1056		RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1057		tmpxorNode = xorNodes;
1058		for (j = 0; j < numParityNodes; j++) {
1059			tmpreadParityNode->succedents[j] = tmpxorNode;
1060			tmpxorNode->antecedents[numDataNodes + i] = tmpreadParityNode;
1061			tmpxorNode->antType[numDataNodes + i] = rf_trueData;
1062			tmpxorNode = tmpxorNode->list_next;
1063		}
1064		tmpreadParityNode = tmpreadParityNode->list_next;
1065	}
1066
1067#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1068	/* connect read old q nodes to q nodes */
1069	if (nfaults == 2) {
1070		tmpreadParityNode = readParityNodes;
1071		tmpreadQNode = readQNodes;
1072		for (i = 0; i < numParityNodes; i++) {
1073			RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1074			tmpqNode = qNodes;
1075			for (j = 0; j < numParityNodes; j++) {
1076				tmpreadQNode->succedents[j] = tmpqNode;
1077				tmpqNode->antecedents[numDataNodes + i] = tmpreadQNodes;
1078				tmpqNode->antType[numDataNodes + i] = rf_trueData;
1079				tmpqNode = tmpqNode->list_next;
1080			}
1081			tmpreadParityNode = tmpreadParityNode->list_next;
1082			tmpreadQNode = tmpreadQNode->list_next;
1083		}
1084	}
1085#endif
1086	/* connect xor nodes to commit node */
1087	RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes));
1088	tmpxorNode = xorNodes;
1089	for (i = 0; i < numParityNodes; i++) {
1090		RF_ASSERT(tmpxorNode->numSuccedents == 1);
1091		tmpxorNode->succedents[0] = commitNode;
1092		commitNode->antecedents[i] = tmpxorNode;
1093		commitNode->antType[i] = rf_control;
1094		tmpxorNode = tmpxorNode->list_next;
1095	}
1096
1097#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1098	/* connect q nodes to commit node */
1099	if (nfaults == 2) {
1100		tmpqNode = qNodes;
1101		for (i = 0; i < numParityNodes; i++) {
1102			RF_ASSERT(tmpqNode->numSuccedents == 1);
1103			tmpqNode->succedents[0] = commitNode;
1104			commitNode->antecedents[i + numParityNodes] = tmpqNode;
1105			commitNode->antType[i + numParityNodes] = rf_control;
1106			tmpqNode = tmpqNode->list_next;
1107		}
1108	}
1109#endif
1110	/* connect commit node to write nodes */
1111	RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes)));
1112	tmpwriteDataNode = writeDataNodes;
1113	for (i = 0; i < numDataNodes; i++) {
1114		RF_ASSERT(tmpwriteDataNodes->numAntecedents == 1);
1115		commitNode->succedents[i] = tmpwriteDataNode;
1116		tmpwriteDataNode->antecedents[0] = commitNode;
1117		tmpwriteDataNode->antType[0] = rf_trueData;
1118		tmpwriteDataNode = tmpwriteDataNode->list_next;
1119	}
1120	tmpwriteParityNode = writeParityNodes;
1121	for (i = 0; i < numParityNodes; i++) {
1122		RF_ASSERT(tmpwriteParityNode->numAntecedents == 1);
1123		commitNode->succedents[i + numDataNodes] = tmpwriteParityNode;
1124		tmpwriteParityNode->antecedents[0] = commitNode;
1125		tmpwriteParityNode->antType[0] = rf_trueData;
1126		tmpwriteParityNode = tmpwriteParityNode->list_next;
1127	}
1128#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1129	if (nfaults == 2) {
1130		tmpwriteQNode = writeQNodes;
1131		for (i = 0; i < numParityNodes; i++) {
1132			RF_ASSERT(tmpwriteQNode->numAntecedents == 1);
1133			commitNode->succedents[i + numDataNodes + numParityNodes] = tmpwriteQNode;
1134			tmpwriteQNode->antecedents[0] = commitNode;
1135			tmpwriteQNode->antType[0] = rf_trueData;
1136			tmpwriteQNode = tmpwriteQNode->list_next;
1137		}
1138	}
1139#endif
1140	RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1141	RF_ASSERT(termNode->numSuccedents == 0);
1142	tmpwriteDataNode = writeDataNodes;
1143	for (i = 0; i < numDataNodes; i++) {
1144		/* connect write new data nodes to term node */
1145		RF_ASSERT(tmpwriteDataNode->numSuccedents == 1);
1146		RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1147		tmpwriteDataNode->succedents[0] = termNode;
1148		termNode->antecedents[i] = tmpwriteDataNode;
1149		termNode->antType[i] = rf_control;
1150		tmpwriteDataNode = tmpwriteDataNode->list_next;
1151	}
1152
1153	tmpwriteParityNode = writeParityNodes;
1154	for (i = 0; i < numParityNodes; i++) {
1155		RF_ASSERT(tmpwriteParityNode->numSuccedents == 1);
1156		tmpwriteParityNode->succedents[0] = termNode;
1157		termNode->antecedents[numDataNodes + i] = tmpwriteParityNode;
1158		termNode->antType[numDataNodes + i] = rf_control;
1159		tmpwriteParityNode = tmpwriteParityNode->list_next;
1160	}
1161
1162#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1163	if (nfaults == 2) {
1164		tmpwriteQNode = writeQNodes;
1165		for (i = 0; i < numParityNodes; i++) {
1166			RF_ASSERT(tmpwriteQNode->numSuccedents == 1);
1167			tmpwriteQNode->succedents[0] = termNode;
1168			termNode->antecedents[numDataNodes + numParityNodes + i] = tmpwriteQNode;
1169			termNode->antType[numDataNodes + numParityNodes + i] = rf_control;
1170			tmpwriteQNode = tmpwriteQNode->list_next;
1171		}
1172	}
1173#endif
1174}
1175
1176
1177/******************************************************************************
1178 * create a write graph (fault-free or degraded) for RAID level 1
1179 *
1180 * Hdr -> Commit -> Wpd -> Nil -> Trm
1181 *               -> Wsd ->
1182 *
1183 * The "Wpd" node writes data to the primary copy in the mirror pair
1184 * The "Wsd" node writes data to the secondary copy in the mirror pair
1185 *
1186 * Parameters:  raidPtr   - description of the physical array
1187 *              asmap     - logical & physical addresses for this access
1188 *              bp        - buffer ptr (holds write data)
1189 *              flags     - general flags (e.g. disk locking)
1190 *              allocList - list of memory allocated in DAG creation
1191 *****************************************************************************/
1192
1193void
1194rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
1195			 RF_DagHeader_t *dag_h, void *bp,
1196			 RF_RaidAccessFlags_t flags,
1197			 RF_AllocListElem_t *allocList)
1198{
1199	RF_DagNode_t *unblockNode, *termNode, *commitNode;
1200	RF_DagNode_t *wndNode, *wmirNode;
1201	RF_DagNode_t *tmpNode, *tmpwndNode, *tmpwmirNode;
1202	int     nWndNodes, nWmirNodes, i;
1203	RF_ReconUnitNum_t which_ru;
1204	RF_PhysDiskAddr_t *pda, *pdaP;
1205	RF_StripeNum_t parityStripeID;
1206
1207	parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
1208	    asmap->raidAddress, &which_ru);
1209#if RF_DEBUG_DAG
1210	if (rf_dagDebug) {
1211		printf("[Creating RAID level 1 write DAG]\n");
1212	}
1213#endif
1214	dag_h->creator = "RaidOneWriteDAG";
1215
1216	/* 2 implies access not SU aligned */
1217	nWmirNodes = (asmap->parityInfo->next) ? 2 : 1;
1218	nWndNodes = (asmap->physInfo->next) ? 2 : 1;
1219
1220	/* alloc the Wnd nodes and the Wmir node */
1221	if (asmap->numDataFailed == 1)
1222		nWndNodes--;
1223	if (asmap->numParityFailed == 1)
1224		nWmirNodes--;
1225
1226	/* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock
1227	 * + terminator) */
1228	for (i = 0; i < nWndNodes; i++) {
1229		tmpNode = rf_AllocDAGNode();
1230		tmpNode->list_next = dag_h->nodes;
1231		dag_h->nodes = tmpNode;
1232	}
1233	wndNode = dag_h->nodes;
1234
1235	for (i = 0; i < nWmirNodes; i++) {
1236		tmpNode = rf_AllocDAGNode();
1237		tmpNode->list_next = dag_h->nodes;
1238		dag_h->nodes = tmpNode;
1239	}
1240	wmirNode = dag_h->nodes;
1241
1242	commitNode = rf_AllocDAGNode();
1243	commitNode->list_next = dag_h->nodes;
1244	dag_h->nodes = commitNode;
1245
1246	unblockNode = rf_AllocDAGNode();
1247	unblockNode->list_next = dag_h->nodes;
1248	dag_h->nodes = unblockNode;
1249
1250	termNode = rf_AllocDAGNode();
1251	termNode->list_next = dag_h->nodes;
1252	dag_h->nodes = termNode;
1253
1254	/* this dag can commit immediately */
1255	dag_h->numCommitNodes = 1;
1256	dag_h->numCommits = 0;
1257	dag_h->numSuccedents = 1;
1258
1259	/* initialize the commit, unblock, and term nodes */
1260	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
1261		    rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes),
1262		    0, 0, 0, dag_h, "Cmt", allocList);
1263	rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
1264		    rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes),
1265		    0, 0, dag_h, "Nil", allocList);
1266	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
1267		    rf_TerminateUndoFunc, NULL, 0, 1, 0, 0,
1268		    dag_h, "Trm", allocList);
1269
1270	/* initialize the wnd nodes */
1271	if (nWndNodes > 0) {
1272		pda = asmap->physInfo;
1273		tmpwndNode = wndNode;
1274		for (i = 0; i < nWndNodes; i++) {
1275			rf_InitNode(tmpwndNode, rf_wait, RF_FALSE,
1276				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1277				    rf_GenericWakeupFunc, 1, 1, 4, 0,
1278				    dag_h, "Wpd", allocList);
1279			RF_ASSERT(pda != NULL);
1280			tmpwndNode->params[0].p = pda;
1281			tmpwndNode->params[1].p = pda->bufPtr;
1282			tmpwndNode->params[2].v = parityStripeID;
1283			tmpwndNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1284			pda = pda->next;
1285			tmpwndNode = tmpwndNode->list_next;
1286		}
1287		RF_ASSERT(pda == NULL);
1288	}
1289	/* initialize the mirror nodes */
1290	if (nWmirNodes > 0) {
1291		pda = asmap->physInfo;
1292		pdaP = asmap->parityInfo;
1293		tmpwmirNode = wmirNode;
1294		for (i = 0; i < nWmirNodes; i++) {
1295			rf_InitNode(tmpwmirNode, rf_wait, RF_FALSE,
1296				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1297				    rf_GenericWakeupFunc, 1, 1, 4, 0,
1298				    dag_h, "Wsd", allocList);
1299			RF_ASSERT(pda != NULL);
1300			tmpwmirNode->params[0].p = pdaP;
1301			tmpwmirNode->params[1].p = pda->bufPtr;
1302			tmpwmirNode->params[2].v = parityStripeID;
1303			tmpwmirNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1304			pda = pda->next;
1305			pdaP = pdaP->next;
1306			tmpwmirNode = tmpwmirNode->list_next;
1307		}
1308		RF_ASSERT(pda == NULL);
1309		RF_ASSERT(pdaP == NULL);
1310	}
1311	/* link the header node to the commit node */
1312	RF_ASSERT(dag_h->numSuccedents == 1);
1313	RF_ASSERT(commitNode->numAntecedents == 0);
1314	dag_h->succedents[0] = commitNode;
1315
1316	/* link the commit node to the write nodes */
1317	RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes));
1318	tmpwndNode = wndNode;
1319	for (i = 0; i < nWndNodes; i++) {
1320		RF_ASSERT(tmpwndNode->numAntecedents == 1);
1321		commitNode->succedents[i] = tmpwndNode;
1322		tmpwndNode->antecedents[0] = commitNode;
1323		tmpwndNode->antType[0] = rf_control;
1324		tmpwndNode = tmpwndNode->list_next;
1325	}
1326	tmpwmirNode = wmirNode;
1327	for (i = 0; i < nWmirNodes; i++) {
1328		RF_ASSERT(tmpwmirNode->numAntecedents == 1);
1329		commitNode->succedents[i + nWndNodes] = tmpwmirNode;
1330		tmpwmirNode->antecedents[0] = commitNode;
1331		tmpwmirNode->antType[0] = rf_control;
1332		tmpwmirNode = tmpwmirNode->list_next;
1333	}
1334
1335	/* link the write nodes to the unblock node */
1336	RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes));
1337	tmpwndNode = wndNode;
1338	for (i = 0; i < nWndNodes; i++) {
1339		RF_ASSERT(tmpwndNode->numSuccedents == 1);
1340		tmpwndNode->succedents[0] = unblockNode;
1341		unblockNode->antecedents[i] = tmpwndNode;
1342		unblockNode->antType[i] = rf_control;
1343		tmpwndNode = tmpwndNode->list_next;
1344	}
1345	tmpwmirNode = wmirNode;
1346	for (i = 0; i < nWmirNodes; i++) {
1347		RF_ASSERT(tmpwmirNode->numSuccedents == 1);
1348		tmpwmirNode->succedents[0] = unblockNode;
1349		unblockNode->antecedents[i + nWndNodes] = tmpwmirNode;
1350		unblockNode->antType[i + nWndNodes] = rf_control;
1351		tmpwmirNode = tmpwmirNode->list_next;
1352	}
1353
1354	/* link the unblock node to the term node */
1355	RF_ASSERT(unblockNode->numSuccedents == 1);
1356	RF_ASSERT(termNode->numAntecedents == 1);
1357	RF_ASSERT(termNode->numSuccedents == 0);
1358	unblockNode->succedents[0] = termNode;
1359	termNode->antecedents[0] = unblockNode;
1360	termNode->antType[0] = rf_control;
1361}
1362