rf_dagffwr.c revision 1.34
1/*	$NetBSD: rf_dagffwr.c,v 1.34 2013/09/15 12:41:17 martin Exp $	*/
2/*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29/*
30 * rf_dagff.c
31 *
32 * code for creating fault-free DAGs
33 *
34 */
35
36#include <sys/cdefs.h>
37__KERNEL_RCSID(0, "$NetBSD: rf_dagffwr.c,v 1.34 2013/09/15 12:41:17 martin Exp $");
38
39#include <dev/raidframe/raidframevar.h>
40
41#include "rf_raid.h"
42#include "rf_dag.h"
43#include "rf_dagutils.h"
44#include "rf_dagfuncs.h"
45#include "rf_debugMem.h"
46#include "rf_dagffrd.h"
47#include "rf_general.h"
48#include "rf_dagffwr.h"
49#include "rf_map.h"
50
51/******************************************************************************
52 *
53 * General comments on DAG creation:
54 *
55 * All DAGs in this file use roll-away error recovery.  Each DAG has a single
56 * commit node, usually called "Cmt."  If an error occurs before the Cmt node
57 * is reached, the execution engine will halt forward execution and work
58 * backward through the graph, executing the undo functions.  Assuming that
59 * each node in the graph prior to the Cmt node are undoable and atomic - or -
60 * does not make changes to permanent state, the graph will fail atomically.
61 * If an error occurs after the Cmt node executes, the engine will roll-forward
62 * through the graph, blindly executing nodes until it reaches the end.
63 * If a graph reaches the end, it is assumed to have completed successfully.
64 *
65 * A graph has only 1 Cmt node.
66 *
67 */
68
69
70/******************************************************************************
71 *
72 * The following wrappers map the standard DAG creation interface to the
73 * DAG creation routines.  Additionally, these wrappers enable experimentation
74 * with new DAG structures by providing an extra level of indirection, allowing
75 * the DAG creation routines to be replaced at this single point.
76 */
77
78
79void
80rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
81			      RF_DagHeader_t *dag_h, void *bp,
82			      RF_RaidAccessFlags_t flags,
83			      RF_AllocListElem_t *allocList,
84			      RF_IoType_t type)
85{
86	rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
87				 RF_IO_TYPE_WRITE);
88}
89
90void
91rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
92		       RF_DagHeader_t *dag_h, void *bp,
93		       RF_RaidAccessFlags_t flags,
94		       RF_AllocListElem_t *allocList,
95		       RF_IoType_t type)
96{
97	rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
98				 RF_IO_TYPE_WRITE);
99}
100
101void
102rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
103		       RF_DagHeader_t *dag_h, void *bp,
104		       RF_RaidAccessFlags_t flags,
105		       RF_AllocListElem_t *allocList)
106{
107	/* "normal" rollaway */
108	rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
109				     allocList, &rf_xorFuncs, NULL);
110}
111
112void
113rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
114		       RF_DagHeader_t *dag_h, void *bp,
115		       RF_RaidAccessFlags_t flags,
116		       RF_AllocListElem_t *allocList)
117{
118	/* "normal" rollaway */
119	rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
120				     allocList, 1, rf_RegularXorFunc, RF_TRUE);
121}
122
123
124/******************************************************************************
125 *
126 * DAG creation code begins here
127 */
128
129
130/******************************************************************************
131 *
132 * creates a DAG to perform a large-write operation:
133 *
134 *           / Rod \           / Wnd \
135 * H -- block- Rod - Xor - Cmt - Wnd --- T
136 *           \ Rod /          \  Wnp /
137 *                             \[Wnq]/
138 *
139 * The XOR node also does the Q calculation in the P+Q architecture.
140 * All nodes are before the commit node (Cmt) are assumed to be atomic and
141 * undoable - or - they make no changes to permanent state.
142 *
143 * Rod = read old data
144 * Cmt = commit node
145 * Wnp = write new parity
146 * Wnd = write new data
147 * Wnq = write new "q"
148 * [] denotes optional segments in the graph
149 *
150 * Parameters:  raidPtr   - description of the physical array
151 *              asmap     - logical & physical addresses for this access
152 *              bp        - buffer ptr (holds write data)
153 *              flags     - general flags (e.g. disk locking)
154 *              allocList - list of memory allocated in DAG creation
155 *              nfaults   - number of faults array can tolerate
156 *                          (equal to # redundancy units in stripe)
157 *              redfuncs  - list of redundancy generating functions
158 *
159 *****************************************************************************/
160
161void
162rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
163			     RF_DagHeader_t *dag_h, void *bp,
164			     RF_RaidAccessFlags_t flags,
165			     RF_AllocListElem_t *allocList,
166			     int nfaults, int (*redFunc) (RF_DagNode_t *),
167			     int allowBufferRecycle)
168{
169	RF_DagNode_t *wndNodes, *rodNodes, *xorNode, *wnpNode, *tmpNode;
170	RF_DagNode_t *blockNode, *commitNode, *termNode;
171#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
172	RF_DagNode_t *wnqNode;
173#endif
174	int     nWndNodes, nRodNodes, i, nodeNum, asmNum;
175	RF_AccessStripeMapHeader_t *new_asm_h[2];
176	RF_StripeNum_t parityStripeID;
177	char   *sosBuffer, *eosBuffer;
178	RF_ReconUnitNum_t which_ru;
179	RF_RaidLayout_t *layoutPtr;
180	RF_PhysDiskAddr_t *pda;
181
182	layoutPtr = &(raidPtr->Layout);
183	parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
184							asmap->raidAddress,
185							&which_ru);
186
187#if RF_DEBUG_DAG
188	if (rf_dagDebug) {
189		printf("[Creating large-write DAG]\n");
190	}
191#endif
192	dag_h->creator = "LargeWriteDAG";
193
194	dag_h->numCommitNodes = 1;
195	dag_h->numCommits = 0;
196	dag_h->numSuccedents = 1;
197
198	/* alloc the nodes: Wnd, xor, commit, block, term, and  Wnp */
199	nWndNodes = asmap->numStripeUnitsAccessed;
200
201	for (i = 0; i < nWndNodes; i++) {
202		tmpNode = rf_AllocDAGNode();
203		tmpNode->list_next = dag_h->nodes;
204		dag_h->nodes = tmpNode;
205	}
206	wndNodes = dag_h->nodes;
207
208	xorNode = rf_AllocDAGNode();
209	xorNode->list_next = dag_h->nodes;
210	dag_h->nodes = xorNode;
211
212	wnpNode = rf_AllocDAGNode();
213	wnpNode->list_next = dag_h->nodes;
214	dag_h->nodes = wnpNode;
215
216	blockNode = rf_AllocDAGNode();
217	blockNode->list_next = dag_h->nodes;
218	dag_h->nodes = blockNode;
219
220	commitNode = rf_AllocDAGNode();
221	commitNode->list_next = dag_h->nodes;
222	dag_h->nodes = commitNode;
223
224	termNode = rf_AllocDAGNode();
225	termNode->list_next = dag_h->nodes;
226	dag_h->nodes = termNode;
227
228#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
229	if (nfaults == 2) {
230		wnqNode = rf_AllocDAGNode();
231	} else {
232		wnqNode = NULL;
233	}
234#endif
235	rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h,
236					new_asm_h, &nRodNodes, &sosBuffer,
237					&eosBuffer, allocList);
238	if (nRodNodes > 0) {
239		for (i = 0; i < nRodNodes; i++) {
240			tmpNode = rf_AllocDAGNode();
241			tmpNode->list_next = dag_h->nodes;
242			dag_h->nodes = tmpNode;
243		}
244		rodNodes = dag_h->nodes;
245	} else {
246		rodNodes = NULL;
247	}
248
249	/* begin node initialization */
250	if (nRodNodes > 0) {
251		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
252			    rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0,
253			    dag_h, "Nil", allocList);
254	} else {
255		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
256			    rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0,
257			    dag_h, "Nil", allocList);
258	}
259
260	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
261		    rf_NullNodeUndoFunc, NULL, nWndNodes + nfaults, 1, 0, 0,
262		    dag_h, "Cmt", allocList);
263	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
264		    rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0,
265		    dag_h, "Trm", allocList);
266
267	/* initialize the Rod nodes */
268	tmpNode = rodNodes;
269	for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
270		if (new_asm_h[asmNum]) {
271			pda = new_asm_h[asmNum]->stripeMap->physInfo;
272			while (pda) {
273				rf_InitNode(tmpNode, rf_wait,
274					    RF_FALSE, rf_DiskReadFunc,
275					    rf_DiskReadUndoFunc,
276					    rf_GenericWakeupFunc,
277					    1, 1, 4, 0, dag_h,
278					    "Rod", allocList);
279				tmpNode->params[0].p = pda;
280				tmpNode->params[1].p = pda->bufPtr;
281				tmpNode->params[2].v = parityStripeID;
282				tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
283				    which_ru);
284				nodeNum++;
285				pda = pda->next;
286				tmpNode = tmpNode->list_next;
287			}
288		}
289	}
290	RF_ASSERT(nodeNum == nRodNodes);
291
292	/* initialize the wnd nodes */
293	pda = asmap->physInfo;
294	tmpNode = wndNodes;
295	for (i = 0; i < nWndNodes; i++) {
296		rf_InitNode(tmpNode, rf_wait, RF_FALSE,
297			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
298			    rf_GenericWakeupFunc, 1, 1, 4, 0,
299			    dag_h, "Wnd", allocList);
300		RF_ASSERT(pda != NULL);
301		tmpNode->params[0].p = pda;
302		tmpNode->params[1].p = pda->bufPtr;
303		tmpNode->params[2].v = parityStripeID;
304		tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
305		pda = pda->next;
306		tmpNode = tmpNode->list_next;
307	}
308
309	/* initialize the redundancy node */
310	if (nRodNodes > 0) {
311		rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
312			    rf_NullNodeUndoFunc, NULL, 1,
313			    nRodNodes, 2 * (nWndNodes + nRodNodes) + 1,
314			    nfaults, dag_h, "Xr ", allocList);
315	} else {
316		rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
317			    rf_NullNodeUndoFunc, NULL, 1,
318			    1, 2 * (nWndNodes + nRodNodes) + 1,
319			    nfaults, dag_h, "Xr ", allocList);
320	}
321	xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
322	tmpNode = wndNodes;
323	for (i = 0; i < nWndNodes; i++) {
324		/* pda */
325		xorNode->params[2 * i + 0] = tmpNode->params[0];
326		/* buf ptr */
327		xorNode->params[2 * i + 1] = tmpNode->params[1];
328		tmpNode = tmpNode->list_next;
329	}
330	tmpNode = rodNodes;
331	for (i = 0; i < nRodNodes; i++) {
332		/* pda */
333		xorNode->params[2 * (nWndNodes + i) + 0] = tmpNode->params[0];
334		/* buf ptr */
335		xorNode->params[2 * (nWndNodes + i) + 1] = tmpNode->params[1];
336		tmpNode = tmpNode->list_next;
337	}
338	/* xor node needs to get at RAID information */
339	xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr;
340
341	/*
342         * Look for an Rod node that reads a complete SU. If none,
343         * alloc a buffer to receive the parity info. Note that we
344         * can't use a new data buffer because it will not have gotten
345         * written when the xor occurs.  */
346	if (allowBufferRecycle) {
347		tmpNode = rodNodes;
348		for (i = 0; i < nRodNodes; i++) {
349			if (((RF_PhysDiskAddr_t *) tmpNode->params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
350				break;
351			tmpNode = tmpNode->list_next;
352		}
353	}
354	if ((!allowBufferRecycle) || (i == nRodNodes)) {
355		xorNode->results[0] = rf_AllocBuffer(raidPtr, dag_h, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit));
356	} else {
357		/* this works because the only way we get here is if
358		   allowBufferRecycle is true and we went through the
359		   above for loop, and exited via the break before
360		   i==nRodNodes was true.  That means tmpNode will
361		   still point to a valid node -- the one we want for
362		   here! */
363		xorNode->results[0] = tmpNode->params[1].p;
364	}
365
366	/* initialize the Wnp node */
367	rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
368		    rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
369		    dag_h, "Wnp", allocList);
370	wnpNode->params[0].p = asmap->parityInfo;
371	wnpNode->params[1].p = xorNode->results[0];
372	wnpNode->params[2].v = parityStripeID;
373	wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
374	/* parityInfo must describe entire parity unit */
375	RF_ASSERT(asmap->parityInfo->next == NULL);
376
377#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
378	if (nfaults == 2) {
379		/*
380	         * We never try to recycle a buffer for the Q calcuation
381	         * in addition to the parity. This would cause two buffers
382	         * to get smashed during the P and Q calculation, guaranteeing
383	         * one would be wrong.
384	         */
385		RF_MallocAndAdd(xorNode->results[1],
386				rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit),
387				(void *), allocList);
388		rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
389			    rf_DiskWriteUndoFunc, rf_GenericWakeupFunc,
390			    1, 1, 4, 0, dag_h, "Wnq", allocList);
391		wnqNode->params[0].p = asmap->qInfo;
392		wnqNode->params[1].p = xorNode->results[1];
393		wnqNode->params[2].v = parityStripeID;
394		wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
395		/* parityInfo must describe entire parity unit */
396		RF_ASSERT(asmap->parityInfo->next == NULL);
397	}
398#endif
399	/*
400         * Connect nodes to form graph.
401         */
402
403	/* connect dag header to block node */
404	RF_ASSERT(blockNode->numAntecedents == 0);
405	dag_h->succedents[0] = blockNode;
406
407	if (nRodNodes > 0) {
408		/* connect the block node to the Rod nodes */
409		RF_ASSERT(blockNode->numSuccedents == nRodNodes);
410		RF_ASSERT(xorNode->numAntecedents == nRodNodes);
411		tmpNode = rodNodes;
412		for (i = 0; i < nRodNodes; i++) {
413			RF_ASSERT(tmpNode->numAntecedents == 1);
414			blockNode->succedents[i] = tmpNode;
415			tmpNode->antecedents[0] = blockNode;
416			tmpNode->antType[0] = rf_control;
417
418			/* connect the Rod nodes to the Xor node */
419			RF_ASSERT(tmpNode->numSuccedents == 1);
420			tmpNode->succedents[0] = xorNode;
421			xorNode->antecedents[i] = tmpNode;
422			xorNode->antType[i] = rf_trueData;
423			tmpNode = tmpNode->list_next;
424		}
425	} else {
426		/* connect the block node to the Xor node */
427		RF_ASSERT(blockNode->numSuccedents == 1);
428		RF_ASSERT(xorNode->numAntecedents == 1);
429		blockNode->succedents[0] = xorNode;
430		xorNode->antecedents[0] = blockNode;
431		xorNode->antType[0] = rf_control;
432	}
433
434	/* connect the xor node to the commit node */
435	RF_ASSERT(xorNode->numSuccedents == 1);
436	RF_ASSERT(commitNode->numAntecedents == 1);
437	xorNode->succedents[0] = commitNode;
438	commitNode->antecedents[0] = xorNode;
439	commitNode->antType[0] = rf_control;
440
441	/* connect the commit node to the write nodes */
442	RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults);
443	tmpNode = wndNodes;
444	for (i = 0; i < nWndNodes; i++) {
445		RF_ASSERT(wndNodes->numAntecedents == 1);
446		commitNode->succedents[i] = tmpNode;
447		tmpNode->antecedents[0] = commitNode;
448		tmpNode->antType[0] = rf_control;
449		tmpNode = tmpNode->list_next;
450	}
451	RF_ASSERT(wnpNode->numAntecedents == 1);
452	commitNode->succedents[nWndNodes] = wnpNode;
453	wnpNode->antecedents[0] = commitNode;
454	wnpNode->antType[0] = rf_trueData;
455#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
456	if (nfaults == 2) {
457		RF_ASSERT(wnqNode->numAntecedents == 1);
458		commitNode->succedents[nWndNodes + 1] = wnqNode;
459		wnqNode->antecedents[0] = commitNode;
460		wnqNode->antType[0] = rf_trueData;
461	}
462#endif
463	/* connect the write nodes to the term node */
464	RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults);
465	RF_ASSERT(termNode->numSuccedents == 0);
466	tmpNode = wndNodes;
467	for (i = 0; i < nWndNodes; i++) {
468		RF_ASSERT(wndNodes->numSuccedents == 1);
469		tmpNode->succedents[0] = termNode;
470		termNode->antecedents[i] = tmpNode;
471		termNode->antType[i] = rf_control;
472		tmpNode = tmpNode->list_next;
473	}
474	RF_ASSERT(wnpNode->numSuccedents == 1);
475	wnpNode->succedents[0] = termNode;
476	termNode->antecedents[nWndNodes] = wnpNode;
477	termNode->antType[nWndNodes] = rf_control;
478#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
479	if (nfaults == 2) {
480		RF_ASSERT(wnqNode->numSuccedents == 1);
481		wnqNode->succedents[0] = termNode;
482		termNode->antecedents[nWndNodes + 1] = wnqNode;
483		termNode->antType[nWndNodes + 1] = rf_control;
484	}
485#endif
486}
487/******************************************************************************
488 *
489 * creates a DAG to perform a small-write operation (either raid 5 or pq),
490 * which is as follows:
491 *
492 * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm
493 *            \- Rod X      /     \----> Wnd [Und]-/
494 *           [\- Rod X     /       \---> Wnd [Und]-/]
495 *           [\- Roq -> Q /         \--> Wnq [Unq]-/]
496 *
497 * Rop = read old parity
498 * Rod = read old data
499 * Roq = read old "q"
500 * Cmt = commit node
501 * Und = unlock data disk
502 * Unp = unlock parity disk
503 * Unq = unlock q disk
504 * Wnp = write new parity
505 * Wnd = write new data
506 * Wnq = write new "q"
507 * [ ] denotes optional segments in the graph
508 *
509 * Parameters:  raidPtr   - description of the physical array
510 *              asmap     - logical & physical addresses for this access
511 *              bp        - buffer ptr (holds write data)
512 *              flags     - general flags (e.g. disk locking)
513 *              allocList - list of memory allocated in DAG creation
514 *              pfuncs    - list of parity generating functions
515 *              qfuncs    - list of q generating functions
516 *
517 * A null qfuncs indicates single fault tolerant
518 *****************************************************************************/
519
520void
521rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
522			     RF_DagHeader_t *dag_h, void *bp,
523			     RF_RaidAccessFlags_t flags,
524			     RF_AllocListElem_t *allocList,
525			     const RF_RedFuncs_t *pfuncs,
526			     const RF_RedFuncs_t *qfuncs)
527{
528	RF_DagNode_t *readDataNodes, *readParityNodes, *termNode;
529	RF_DagNode_t *tmpNode, *tmpreadDataNode, *tmpreadParityNode;
530	RF_DagNode_t *xorNodes, *blockNode, *commitNode;
531	RF_DagNode_t *writeDataNodes, *writeParityNodes;
532	RF_DagNode_t *tmpxorNode, *tmpwriteDataNode;
533	RF_DagNode_t *tmpwriteParityNode;
534#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
535	RF_DagNode_t *tmpwriteQNode, *tmpreadQNode, *tmpqNode, *readQNodes,
536	     *writeQNodes, *qNodes;
537#endif
538	int     i, j, nNodes;
539	RF_ReconUnitNum_t which_ru;
540	int     (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
541	int     (*qfunc) (RF_DagNode_t *) __unused;
542	int     numDataNodes, numParityNodes;
543	RF_StripeNum_t parityStripeID;
544	RF_PhysDiskAddr_t *pda;
545	const char *name, *qname __unused;
546	long    nfaults;
547
548	nfaults = qfuncs ? 2 : 1;
549
550	parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
551	    asmap->raidAddress, &which_ru);
552	pda = asmap->physInfo;
553	numDataNodes = asmap->numStripeUnitsAccessed;
554	numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
555
556#if RF_DEBUG_DAG
557	if (rf_dagDebug) {
558		printf("[Creating small-write DAG]\n");
559	}
560#endif
561	RF_ASSERT(numDataNodes > 0);
562	dag_h->creator = "SmallWriteDAG";
563
564	dag_h->numCommitNodes = 1;
565	dag_h->numCommits = 0;
566	dag_h->numSuccedents = 1;
567
568	/*
569         * DAG creation occurs in four steps:
570         * 1. count the number of nodes in the DAG
571         * 2. create the nodes
572         * 3. initialize the nodes
573         * 4. connect the nodes
574         */
575
576	/*
577         * Step 1. compute number of nodes in the graph
578         */
579
580	/* number of nodes: a read and write for each data unit a
581	 * redundancy computation node for each parity node (nfaults *
582	 * nparity) a read and write for each parity unit a block and
583	 * commit node (2) a terminate node if atomic RMW an unlock
584	 * node for each data unit, redundancy unit
585	 * totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes)
586	 *   + (nfaults * 2 * numParityNodes) + 3;
587	 */
588
589	/*
590         * Step 2. create the nodes
591         */
592
593	blockNode = rf_AllocDAGNode();
594	blockNode->list_next = dag_h->nodes;
595	dag_h->nodes = blockNode;
596
597	commitNode = rf_AllocDAGNode();
598	commitNode->list_next = dag_h->nodes;
599	dag_h->nodes = commitNode;
600
601	for (i = 0; i < numDataNodes; i++) {
602		tmpNode = rf_AllocDAGNode();
603		tmpNode->list_next = dag_h->nodes;
604		dag_h->nodes = tmpNode;
605	}
606	readDataNodes = dag_h->nodes;
607
608	for (i = 0; i < numParityNodes; i++) {
609		tmpNode = rf_AllocDAGNode();
610		tmpNode->list_next = dag_h->nodes;
611		dag_h->nodes = tmpNode;
612	}
613	readParityNodes = dag_h->nodes;
614
615	for (i = 0; i < numDataNodes; i++) {
616		tmpNode = rf_AllocDAGNode();
617		tmpNode->list_next = dag_h->nodes;
618		dag_h->nodes = tmpNode;
619	}
620	writeDataNodes = dag_h->nodes;
621
622	for (i = 0; i < numParityNodes; i++) {
623		tmpNode = rf_AllocDAGNode();
624		tmpNode->list_next = dag_h->nodes;
625		dag_h->nodes = tmpNode;
626	}
627	writeParityNodes = dag_h->nodes;
628
629	for (i = 0; i < numParityNodes; i++) {
630		tmpNode = rf_AllocDAGNode();
631		tmpNode->list_next = dag_h->nodes;
632		dag_h->nodes = tmpNode;
633	}
634	xorNodes = dag_h->nodes;
635
636	termNode = rf_AllocDAGNode();
637	termNode->list_next = dag_h->nodes;
638	dag_h->nodes = termNode;
639
640#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
641	if (nfaults == 2) {
642		for (i = 0; i < numParityNodes; i++) {
643			tmpNode = rf_AllocDAGNode();
644			tmpNode->list_next = dag_h->nodes;
645			dag_h->nodes = tmpNode;
646		}
647		readQNodes = dag_h->nodes;
648
649		for (i = 0; i < numParityNodes; i++) {
650			tmpNode = rf_AllocDAGNode();
651			tmpNode->list_next = dag_h->nodes;
652			dag_h->nodes = tmpNode;
653		}
654		writeQNodes = dag_h->nodes;
655
656		for (i = 0; i < numParityNodes; i++) {
657			tmpNode = rf_AllocDAGNode();
658			tmpNode->list_next = dag_h->nodes;
659			dag_h->nodes = tmpNode;
660		}
661		qNodes = dag_h->nodes;
662	} else {
663		readQNodes = writeQNodes = qNodes = NULL;
664	}
665#endif
666
667	/*
668         * Step 3. initialize the nodes
669         */
670	/* initialize block node (Nil) */
671	nNodes = numDataNodes + (nfaults * numParityNodes);
672	rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
673		    rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0,
674		    dag_h, "Nil", allocList);
675
676	/* initialize commit node (Cmt) */
677	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
678		    rf_NullNodeUndoFunc, NULL, nNodes,
679		    (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList);
680
681	/* initialize terminate node (Trm) */
682	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
683		    rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0,
684		    dag_h, "Trm", allocList);
685
686	/* initialize nodes which read old data (Rod) */
687	tmpreadDataNode = readDataNodes;
688	for (i = 0; i < numDataNodes; i++) {
689		rf_InitNode(tmpreadDataNode, rf_wait, RF_FALSE,
690			    rf_DiskReadFunc, rf_DiskReadUndoFunc,
691			    rf_GenericWakeupFunc, (nfaults * numParityNodes),
692			    1, 4, 0, dag_h, "Rod", allocList);
693		RF_ASSERT(pda != NULL);
694		/* physical disk addr desc */
695		tmpreadDataNode->params[0].p = pda;
696		/* buffer to hold old data */
697		tmpreadDataNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
698		tmpreadDataNode->params[2].v = parityStripeID;
699		tmpreadDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
700		    which_ru);
701		pda = pda->next;
702		for (j = 0; j < tmpreadDataNode->numSuccedents; j++) {
703			tmpreadDataNode->propList[j] = NULL;
704		}
705		tmpreadDataNode = tmpreadDataNode->list_next;
706	}
707
708	/* initialize nodes which read old parity (Rop) */
709	pda = asmap->parityInfo;
710	i = 0;
711	tmpreadParityNode = readParityNodes;
712	for (i = 0; i < numParityNodes; i++) {
713		RF_ASSERT(pda != NULL);
714		rf_InitNode(tmpreadParityNode, rf_wait, RF_FALSE,
715			    rf_DiskReadFunc, rf_DiskReadUndoFunc,
716			    rf_GenericWakeupFunc, numParityNodes, 1, 4, 0,
717			    dag_h, "Rop", allocList);
718		tmpreadParityNode->params[0].p = pda;
719		/* buffer to hold old parity */
720		tmpreadParityNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
721		tmpreadParityNode->params[2].v = parityStripeID;
722		tmpreadParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
723		    which_ru);
724		pda = pda->next;
725		for (j = 0; j < tmpreadParityNode->numSuccedents; j++) {
726			tmpreadParityNode->propList[0] = NULL;
727		}
728		tmpreadParityNode = tmpreadParityNode->list_next;
729	}
730
731#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
732	/* initialize nodes which read old Q (Roq) */
733	if (nfaults == 2) {
734		pda = asmap->qInfo;
735		tmpreadQNode = readQNodes;
736		for (i = 0; i < numParityNodes; i++) {
737			RF_ASSERT(pda != NULL);
738			rf_InitNode(tmpreadQNode, rf_wait, RF_FALSE,
739				    rf_DiskReadFunc, rf_DiskReadUndoFunc,
740				    rf_GenericWakeupFunc, numParityNodes,
741				    1, 4, 0, dag_h, "Roq", allocList);
742			tmpreadQNode->params[0].p = pda;
743			/* buffer to hold old Q */
744			tmpreadQNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h,
745								   pda->numSector << raidPtr->logBytesPerSector);
746			tmpreadQNode->params[2].v = parityStripeID;
747			tmpreadQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
748			    which_ru);
749			pda = pda->next;
750			for (j = 0; j < tmpreadQNode->numSuccedents; j++) {
751				tmpreadQNode->propList[0] = NULL;
752			}
753			tmpreadQNode = tmpreadQNode->list_next;
754		}
755	}
756#endif
757	/* initialize nodes which write new data (Wnd) */
758	pda = asmap->physInfo;
759	tmpwriteDataNode = writeDataNodes;
760	for (i = 0; i < numDataNodes; i++) {
761		RF_ASSERT(pda != NULL);
762		rf_InitNode(tmpwriteDataNode, rf_wait, RF_FALSE,
763			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
764			    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
765			    "Wnd", allocList);
766		/* physical disk addr desc */
767		tmpwriteDataNode->params[0].p = pda;
768		/* buffer holding new data to be written */
769		tmpwriteDataNode->params[1].p = pda->bufPtr;
770		tmpwriteDataNode->params[2].v = parityStripeID;
771		tmpwriteDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
772		    which_ru);
773		pda = pda->next;
774		tmpwriteDataNode = tmpwriteDataNode->list_next;
775	}
776
777	/*
778         * Initialize nodes which compute new parity and Q.
779         */
780	/*
781         * We use the simple XOR func in the double-XOR case, and when
782         * we're accessing only a portion of one stripe unit. The
783         * distinction between the two is that the regular XOR func
784         * assumes that the targbuf is a full SU in size, and examines
785         * the pda associated with the buffer to decide where within
786         * the buffer to XOR the data, whereas the simple XOR func
787         * just XORs the data into the start of the buffer.  */
788	if ((numParityNodes == 2) || ((numDataNodes == 1)
789		&& (asmap->totalSectorsAccessed <
790		    raidPtr->Layout.sectorsPerStripeUnit))) {
791		func = pfuncs->simple;
792		undoFunc = rf_NullNodeUndoFunc;
793		name = pfuncs->SimpleName;
794		if (qfuncs) {
795			qfunc = qfuncs->simple;
796			qname = qfuncs->SimpleName;
797		} else {
798			qfunc = NULL;
799			qname = NULL;
800		}
801	} else {
802		func = pfuncs->regular;
803		undoFunc = rf_NullNodeUndoFunc;
804		name = pfuncs->RegularName;
805		if (qfuncs) {
806			qfunc = qfuncs->regular;
807			qname = qfuncs->RegularName;
808		} else {
809			qfunc = NULL;
810			qname = NULL;
811		}
812	}
813	/*
814         * Initialize the xor nodes: params are {pda,buf}
815         * from {Rod,Wnd,Rop} nodes, and raidPtr
816         */
817	if (numParityNodes == 2) {
818		/* double-xor case */
819		tmpxorNode = xorNodes;
820		tmpreadDataNode = readDataNodes;
821		tmpreadParityNode = readParityNodes;
822		tmpwriteDataNode = writeDataNodes;
823#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
824		tmpqNode = qNodes;
825		tmpreadQNode = readQNodes;
826#endif
827		for (i = 0; i < numParityNodes; i++) {
828			/* note: no wakeup func for xor */
829			rf_InitNode(tmpxorNode, rf_wait, RF_FALSE, func,
830				    undoFunc, NULL, 1,
831				    (numDataNodes + numParityNodes),
832				    7, 1, dag_h, name, allocList);
833			tmpxorNode->flags |= RF_DAGNODE_FLAG_YIELD;
834			tmpxorNode->params[0] = tmpreadDataNode->params[0];
835			tmpxorNode->params[1] = tmpreadDataNode->params[1];
836			tmpxorNode->params[2] = tmpreadParityNode->params[0];
837			tmpxorNode->params[3] = tmpreadParityNode->params[1];
838			tmpxorNode->params[4] = tmpwriteDataNode->params[0];
839			tmpxorNode->params[5] = tmpwriteDataNode->params[1];
840			tmpxorNode->params[6].p = raidPtr;
841			/* use old parity buf as target buf */
842			tmpxorNode->results[0] = tmpreadParityNode->params[1].p;
843#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
844			if (nfaults == 2) {
845				/* note: no wakeup func for qor */
846				rf_InitNode(tmpqNode, rf_wait, RF_FALSE,
847					    qfunc, undoFunc, NULL, 1,
848					    (numDataNodes + numParityNodes),
849					    7, 1, dag_h, qname, allocList);
850				tmpqNode->params[0] = tmpreadDataNode->params[0];
851				tmpqNode->params[1] = tmpreadDataNode->params[1];
852				tmpqNode->params[2] = tmpreadQNode->.params[0];
853				tmpqNode->params[3] = tmpreadQNode->params[1];
854				tmpqNode->params[4] = tmpwriteDataNode->params[0];
855				tmpqNode->params[5] = tmpwriteDataNode->params[1];
856				tmpqNode->params[6].p = raidPtr;
857				/* use old Q buf as target buf */
858				tmpqNode->results[0] = tmpreadQNode->params[1].p;
859				tmpqNode = tmpqNode->list_next;
860				tmpreadQNodes = tmpreadQNodes->list_next;
861			}
862#endif
863			tmpxorNode = tmpxorNode->list_next;
864			tmpreadDataNode = tmpreadDataNode->list_next;
865			tmpreadParityNode = tmpreadParityNode->list_next;
866			tmpwriteDataNode = tmpwriteDataNode->list_next;
867		}
868	} else {
869		/* there is only one xor node in this case */
870		rf_InitNode(xorNodes, rf_wait, RF_FALSE, func,
871			    undoFunc, NULL, 1, (numDataNodes + numParityNodes),
872			    (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
873			    dag_h, name, allocList);
874		xorNodes->flags |= RF_DAGNODE_FLAG_YIELD;
875		tmpreadDataNode = readDataNodes;
876		for (i = 0; i < numDataNodes; i++) { /* used to be"numDataNodes + 1" until we factored
877							out the "+1" into the "deal with Rop separately below */
878			/* set up params related to Rod nodes */
879			xorNodes->params[2 * i + 0] = tmpreadDataNode->params[0];	/* pda */
880			xorNodes->params[2 * i + 1] = tmpreadDataNode->params[1];	/* buffer ptr */
881			tmpreadDataNode = tmpreadDataNode->list_next;
882		}
883		/* deal with Rop separately */
884		xorNodes->params[2 * numDataNodes + 0] = readParityNodes->params[0];    /* pda */
885		xorNodes->params[2 * numDataNodes + 1] = readParityNodes->params[1];    /* buffer ptr */
886
887		tmpwriteDataNode = writeDataNodes;
888		for (i = 0; i < numDataNodes; i++) {
889			/* set up params related to Wnd and Wnp nodes */
890			xorNodes->params[2 * (numDataNodes + 1 + i) + 0] =	/* pda */
891			    tmpwriteDataNode->params[0];
892			xorNodes->params[2 * (numDataNodes + 1 + i) + 1] =	/* buffer ptr */
893			    tmpwriteDataNode->params[1];
894			tmpwriteDataNode = tmpwriteDataNode->list_next;
895		}
896		/* xor node needs to get at RAID information */
897		xorNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
898		xorNodes->results[0] = readParityNodes->params[1].p;
899#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
900		if (nfaults == 2) {
901			rf_InitNode(qNodes, rf_wait, RF_FALSE, qfunc,
902				    undoFunc, NULL, 1,
903				    (numDataNodes + numParityNodes),
904				    (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
905				    dag_h, qname, allocList);
906			tmpreadDataNode = readDataNodes;
907			for (i = 0; i < numDataNodes; i++) {
908				/* set up params related to Rod */
909				qNodes->params[2 * i + 0] = tmpreadDataNode->params[0];	/* pda */
910				qNodes->params[2 * i + 1] = tmpreadDataNode->params[1];	/* buffer ptr */
911				tmpreadDataNode = tmpreadDataNode->list_next;
912			}
913			/* and read old q */
914			qNodes->params[2 * numDataNodes + 0] =	/* pda */
915			    readQNodes->params[0];
916			qNodes->params[2 * numDataNodes + 1] =	/* buffer ptr */
917			    readQNodes->params[1];
918			tmpwriteDataNode = writeDataNodes;
919			for (i = 0; i < numDataNodes; i++) {
920				/* set up params related to Wnd nodes */
921				qNodes->params[2 * (numDataNodes + 1 + i) + 0] =	/* pda */
922				    tmpwriteDataNode->params[0];
923				qNodes->params[2 * (numDataNodes + 1 + i) + 1] =	/* buffer ptr */
924				    tmpwriteDataNode->params[1];
925				tmpwriteDataNode = tmpwriteDataNode->list_next;
926			}
927			/* xor node needs to get at RAID information */
928			qNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
929			qNodes->results[0] = readQNodes->params[1].p;
930		}
931#endif
932	}
933
934	/* initialize nodes which write new parity (Wnp) */
935	pda = asmap->parityInfo;
936	tmpwriteParityNode = writeParityNodes;
937	tmpxorNode = xorNodes;
938	for (i = 0; i < numParityNodes; i++) {
939		rf_InitNode(tmpwriteParityNode, rf_wait, RF_FALSE,
940			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
941			    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
942			    "Wnp", allocList);
943		RF_ASSERT(pda != NULL);
944		tmpwriteParityNode->params[0].p = pda;	/* param 1 (bufPtr)
945				  			 * filled in by xor node */
946		tmpwriteParityNode->params[1].p = tmpxorNode->results[0];	/* buffer pointer for
947				  						 * parity write
948				  						 * operation */
949		tmpwriteParityNode->params[2].v = parityStripeID;
950		tmpwriteParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
951		    which_ru);
952		pda = pda->next;
953		tmpwriteParityNode = tmpwriteParityNode->list_next;
954		tmpxorNode = tmpxorNode->list_next;
955	}
956
957#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
958	/* initialize nodes which write new Q (Wnq) */
959	if (nfaults == 2) {
960		pda = asmap->qInfo;
961		tmpwriteQNode = writeQNodes;
962		tmpqNode = qNodes;
963		for (i = 0; i < numParityNodes; i++) {
964			rf_InitNode(tmpwriteQNode, rf_wait, RF_FALSE,
965				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
966				    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
967				    "Wnq", allocList);
968			RF_ASSERT(pda != NULL);
969			tmpwriteQNode->params[0].p = pda;	/* param 1 (bufPtr)
970								 * filled in by xor node */
971			tmpwriteQNode->params[1].p = tmpqNode->results[0];	/* buffer pointer for
972										 * parity write
973										 * operation */
974			tmpwriteQNode->params[2].v = parityStripeID;
975			tmpwriteQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
976			    which_ru);
977			pda = pda->next;
978			tmpwriteQNode = tmpwriteQNode->list_next;
979			tmpqNode = tmpqNode->list_next;
980		}
981	}
982#endif
983	/*
984         * Step 4. connect the nodes.
985         */
986
987	/* connect header to block node */
988	dag_h->succedents[0] = blockNode;
989
990	/* connect block node to read old data nodes */
991	RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults)));
992	tmpreadDataNode = readDataNodes;
993	for (i = 0; i < numDataNodes; i++) {
994		blockNode->succedents[i] = tmpreadDataNode;
995		RF_ASSERT(tmpreadDataNode->numAntecedents == 1);
996		tmpreadDataNode->antecedents[0] = blockNode;
997		tmpreadDataNode->antType[0] = rf_control;
998		tmpreadDataNode = tmpreadDataNode->list_next;
999	}
1000
1001	/* connect block node to read old parity nodes */
1002	tmpreadParityNode = readParityNodes;
1003	for (i = 0; i < numParityNodes; i++) {
1004		blockNode->succedents[numDataNodes + i] = tmpreadParityNode;
1005		RF_ASSERT(tmpreadParityNode->numAntecedents == 1);
1006		tmpreadParityNode->antecedents[0] = blockNode;
1007		tmpreadParityNode->antType[0] = rf_control;
1008		tmpreadParityNode = tmpreadParityNode->list_next;
1009	}
1010
1011#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1012	/* connect block node to read old Q nodes */
1013	if (nfaults == 2) {
1014		tmpreadQNode = readQNodes;
1015		for (i = 0; i < numParityNodes; i++) {
1016			blockNode->succedents[numDataNodes + numParityNodes + i] = tmpreadQNode;
1017			RF_ASSERT(tmpreadQNode->numAntecedents == 1);
1018			tmpreadQNode->antecedents[0] = blockNode;
1019			tmpreadQNode->antType[0] = rf_control;
1020			tmpreadQNode = tmpreadQNode->list_next;
1021		}
1022	}
1023#endif
1024	/* connect read old data nodes to xor nodes */
1025	tmpreadDataNode = readDataNodes;
1026	for (i = 0; i < numDataNodes; i++) {
1027		RF_ASSERT(tmpreadDataNode->numSuccedents == (nfaults * numParityNodes));
1028		tmpxorNode = xorNodes;
1029		for (j = 0; j < numParityNodes; j++) {
1030			RF_ASSERT(tmpxorNode->numAntecedents == numDataNodes + numParityNodes);
1031			tmpreadDataNode->succedents[j] = tmpxorNode;
1032			tmpxorNode->antecedents[i] = tmpreadDataNode;
1033			tmpxorNode->antType[i] = rf_trueData;
1034			tmpxorNode = tmpxorNode->list_next;
1035		}
1036		tmpreadDataNode = tmpreadDataNode->list_next;
1037	}
1038
1039#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1040	/* connect read old data nodes to q nodes */
1041	if (nfaults == 2) {
1042		tmpreadDataNode = readDataNodes;
1043		for (i = 0; i < numDataNodes; i++) {
1044			tmpqNode = qNodes;
1045			for (j = 0; j < numParityNodes; j++) {
1046				RF_ASSERT(tmpqNode->numAntecedents == numDataNodes + numParityNodes);
1047				tmpreadDataNode->succedents[numParityNodes + j] = tmpqNode;
1048				tmpqNode->antecedents[i] = tmpreadDataNode;
1049				tmpqNode->antType[i] = rf_trueData;
1050				tmpqNode = tmpqNode->list_next;
1051			}
1052			tmpreadDataNode = tmpreadDataNode->list_next;
1053		}
1054	}
1055#endif
1056	/* connect read old parity nodes to xor nodes */
1057	tmpreadParityNode = readParityNodes;
1058	for (i = 0; i < numParityNodes; i++) {
1059		RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1060		tmpxorNode = xorNodes;
1061		for (j = 0; j < numParityNodes; j++) {
1062			tmpreadParityNode->succedents[j] = tmpxorNode;
1063			tmpxorNode->antecedents[numDataNodes + i] = tmpreadParityNode;
1064			tmpxorNode->antType[numDataNodes + i] = rf_trueData;
1065			tmpxorNode = tmpxorNode->list_next;
1066		}
1067		tmpreadParityNode = tmpreadParityNode->list_next;
1068	}
1069
1070#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1071	/* connect read old q nodes to q nodes */
1072	if (nfaults == 2) {
1073		tmpreadParityNode = readParityNodes;
1074		tmpreadQNode = readQNodes;
1075		for (i = 0; i < numParityNodes; i++) {
1076			RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1077			tmpqNode = qNodes;
1078			for (j = 0; j < numParityNodes; j++) {
1079				tmpreadQNode->succedents[j] = tmpqNode;
1080				tmpqNode->antecedents[numDataNodes + i] = tmpreadQNodes;
1081				tmpqNode->antType[numDataNodes + i] = rf_trueData;
1082				tmpqNode = tmpqNode->list_next;
1083			}
1084			tmpreadParityNode = tmpreadParityNode->list_next;
1085			tmpreadQNode = tmpreadQNode->list_next;
1086		}
1087	}
1088#endif
1089	/* connect xor nodes to commit node */
1090	RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes));
1091	tmpxorNode = xorNodes;
1092	for (i = 0; i < numParityNodes; i++) {
1093		RF_ASSERT(tmpxorNode->numSuccedents == 1);
1094		tmpxorNode->succedents[0] = commitNode;
1095		commitNode->antecedents[i] = tmpxorNode;
1096		commitNode->antType[i] = rf_control;
1097		tmpxorNode = tmpxorNode->list_next;
1098	}
1099
1100#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1101	/* connect q nodes to commit node */
1102	if (nfaults == 2) {
1103		tmpqNode = qNodes;
1104		for (i = 0; i < numParityNodes; i++) {
1105			RF_ASSERT(tmpqNode->numSuccedents == 1);
1106			tmpqNode->succedents[0] = commitNode;
1107			commitNode->antecedents[i + numParityNodes] = tmpqNode;
1108			commitNode->antType[i + numParityNodes] = rf_control;
1109			tmpqNode = tmpqNode->list_next;
1110		}
1111	}
1112#endif
1113	/* connect commit node to write nodes */
1114	RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes)));
1115	tmpwriteDataNode = writeDataNodes;
1116	for (i = 0; i < numDataNodes; i++) {
1117		RF_ASSERT(tmpwriteDataNode->numAntecedents == 1);
1118		commitNode->succedents[i] = tmpwriteDataNode;
1119		tmpwriteDataNode->antecedents[0] = commitNode;
1120		tmpwriteDataNode->antType[0] = rf_trueData;
1121		tmpwriteDataNode = tmpwriteDataNode->list_next;
1122	}
1123	tmpwriteParityNode = writeParityNodes;
1124	for (i = 0; i < numParityNodes; i++) {
1125		RF_ASSERT(tmpwriteParityNode->numAntecedents == 1);
1126		commitNode->succedents[i + numDataNodes] = tmpwriteParityNode;
1127		tmpwriteParityNode->antecedents[0] = commitNode;
1128		tmpwriteParityNode->antType[0] = rf_trueData;
1129		tmpwriteParityNode = tmpwriteParityNode->list_next;
1130	}
1131#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1132	if (nfaults == 2) {
1133		tmpwriteQNode = writeQNodes;
1134		for (i = 0; i < numParityNodes; i++) {
1135			RF_ASSERT(tmpwriteQNode->numAntecedents == 1);
1136			commitNode->succedents[i + numDataNodes + numParityNodes] = tmpwriteQNode;
1137			tmpwriteQNode->antecedents[0] = commitNode;
1138			tmpwriteQNode->antType[0] = rf_trueData;
1139			tmpwriteQNode = tmpwriteQNode->list_next;
1140		}
1141	}
1142#endif
1143	RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1144	RF_ASSERT(termNode->numSuccedents == 0);
1145	tmpwriteDataNode = writeDataNodes;
1146	for (i = 0; i < numDataNodes; i++) {
1147		/* connect write new data nodes to term node */
1148		RF_ASSERT(tmpwriteDataNode->numSuccedents == 1);
1149		RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1150		tmpwriteDataNode->succedents[0] = termNode;
1151		termNode->antecedents[i] = tmpwriteDataNode;
1152		termNode->antType[i] = rf_control;
1153		tmpwriteDataNode = tmpwriteDataNode->list_next;
1154	}
1155
1156	tmpwriteParityNode = writeParityNodes;
1157	for (i = 0; i < numParityNodes; i++) {
1158		RF_ASSERT(tmpwriteParityNode->numSuccedents == 1);
1159		tmpwriteParityNode->succedents[0] = termNode;
1160		termNode->antecedents[numDataNodes + i] = tmpwriteParityNode;
1161		termNode->antType[numDataNodes + i] = rf_control;
1162		tmpwriteParityNode = tmpwriteParityNode->list_next;
1163	}
1164
1165#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1166	if (nfaults == 2) {
1167		tmpwriteQNode = writeQNodes;
1168		for (i = 0; i < numParityNodes; i++) {
1169			RF_ASSERT(tmpwriteQNode->numSuccedents == 1);
1170			tmpwriteQNode->succedents[0] = termNode;
1171			termNode->antecedents[numDataNodes + numParityNodes + i] = tmpwriteQNode;
1172			termNode->antType[numDataNodes + numParityNodes + i] = rf_control;
1173			tmpwriteQNode = tmpwriteQNode->list_next;
1174		}
1175	}
1176#endif
1177}
1178
1179
1180/******************************************************************************
1181 * create a write graph (fault-free or degraded) for RAID level 1
1182 *
1183 * Hdr -> Commit -> Wpd -> Nil -> Trm
1184 *               -> Wsd ->
1185 *
1186 * The "Wpd" node writes data to the primary copy in the mirror pair
1187 * The "Wsd" node writes data to the secondary copy in the mirror pair
1188 *
1189 * Parameters:  raidPtr   - description of the physical array
1190 *              asmap     - logical & physical addresses for this access
1191 *              bp        - buffer ptr (holds write data)
1192 *              flags     - general flags (e.g. disk locking)
1193 *              allocList - list of memory allocated in DAG creation
1194 *****************************************************************************/
1195
1196void
1197rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
1198			 RF_DagHeader_t *dag_h, void *bp,
1199			 RF_RaidAccessFlags_t flags,
1200			 RF_AllocListElem_t *allocList)
1201{
1202	RF_DagNode_t *unblockNode, *termNode, *commitNode;
1203	RF_DagNode_t *wndNode, *wmirNode;
1204	RF_DagNode_t *tmpNode, *tmpwndNode, *tmpwmirNode;
1205	int     nWndNodes, nWmirNodes, i;
1206	RF_ReconUnitNum_t which_ru;
1207	RF_PhysDiskAddr_t *pda, *pdaP;
1208	RF_StripeNum_t parityStripeID;
1209
1210	parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
1211	    asmap->raidAddress, &which_ru);
1212#if RF_DEBUG_DAG
1213	if (rf_dagDebug) {
1214		printf("[Creating RAID level 1 write DAG]\n");
1215	}
1216#endif
1217	dag_h->creator = "RaidOneWriteDAG";
1218
1219	/* 2 implies access not SU aligned */
1220	nWmirNodes = (asmap->parityInfo->next) ? 2 : 1;
1221	nWndNodes = (asmap->physInfo->next) ? 2 : 1;
1222
1223	/* alloc the Wnd nodes and the Wmir node */
1224	if (asmap->numDataFailed == 1)
1225		nWndNodes--;
1226	if (asmap->numParityFailed == 1)
1227		nWmirNodes--;
1228
1229	/* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock
1230	 * + terminator) */
1231	for (i = 0; i < nWndNodes; i++) {
1232		tmpNode = rf_AllocDAGNode();
1233		tmpNode->list_next = dag_h->nodes;
1234		dag_h->nodes = tmpNode;
1235	}
1236	wndNode = dag_h->nodes;
1237
1238	for (i = 0; i < nWmirNodes; i++) {
1239		tmpNode = rf_AllocDAGNode();
1240		tmpNode->list_next = dag_h->nodes;
1241		dag_h->nodes = tmpNode;
1242	}
1243	wmirNode = dag_h->nodes;
1244
1245	commitNode = rf_AllocDAGNode();
1246	commitNode->list_next = dag_h->nodes;
1247	dag_h->nodes = commitNode;
1248
1249	unblockNode = rf_AllocDAGNode();
1250	unblockNode->list_next = dag_h->nodes;
1251	dag_h->nodes = unblockNode;
1252
1253	termNode = rf_AllocDAGNode();
1254	termNode->list_next = dag_h->nodes;
1255	dag_h->nodes = termNode;
1256
1257	/* this dag can commit immediately */
1258	dag_h->numCommitNodes = 1;
1259	dag_h->numCommits = 0;
1260	dag_h->numSuccedents = 1;
1261
1262	/* initialize the commit, unblock, and term nodes */
1263	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
1264		    rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes),
1265		    0, 0, 0, dag_h, "Cmt", allocList);
1266	rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
1267		    rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes),
1268		    0, 0, dag_h, "Nil", allocList);
1269	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
1270		    rf_TerminateUndoFunc, NULL, 0, 1, 0, 0,
1271		    dag_h, "Trm", allocList);
1272
1273	/* initialize the wnd nodes */
1274	if (nWndNodes > 0) {
1275		pda = asmap->physInfo;
1276		tmpwndNode = wndNode;
1277		for (i = 0; i < nWndNodes; i++) {
1278			rf_InitNode(tmpwndNode, rf_wait, RF_FALSE,
1279				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1280				    rf_GenericWakeupFunc, 1, 1, 4, 0,
1281				    dag_h, "Wpd", allocList);
1282			RF_ASSERT(pda != NULL);
1283			tmpwndNode->params[0].p = pda;
1284			tmpwndNode->params[1].p = pda->bufPtr;
1285			tmpwndNode->params[2].v = parityStripeID;
1286			tmpwndNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1287			pda = pda->next;
1288			tmpwndNode = tmpwndNode->list_next;
1289		}
1290		RF_ASSERT(pda == NULL);
1291	}
1292	/* initialize the mirror nodes */
1293	if (nWmirNodes > 0) {
1294		pda = asmap->physInfo;
1295		pdaP = asmap->parityInfo;
1296		tmpwmirNode = wmirNode;
1297		for (i = 0; i < nWmirNodes; i++) {
1298			rf_InitNode(tmpwmirNode, rf_wait, RF_FALSE,
1299				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1300				    rf_GenericWakeupFunc, 1, 1, 4, 0,
1301				    dag_h, "Wsd", allocList);
1302			RF_ASSERT(pda != NULL);
1303			tmpwmirNode->params[0].p = pdaP;
1304			tmpwmirNode->params[1].p = pda->bufPtr;
1305			tmpwmirNode->params[2].v = parityStripeID;
1306			tmpwmirNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1307			pda = pda->next;
1308			pdaP = pdaP->next;
1309			tmpwmirNode = tmpwmirNode->list_next;
1310		}
1311		RF_ASSERT(pda == NULL);
1312		RF_ASSERT(pdaP == NULL);
1313	}
1314	/* link the header node to the commit node */
1315	RF_ASSERT(dag_h->numSuccedents == 1);
1316	RF_ASSERT(commitNode->numAntecedents == 0);
1317	dag_h->succedents[0] = commitNode;
1318
1319	/* link the commit node to the write nodes */
1320	RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes));
1321	tmpwndNode = wndNode;
1322	for (i = 0; i < nWndNodes; i++) {
1323		RF_ASSERT(tmpwndNode->numAntecedents == 1);
1324		commitNode->succedents[i] = tmpwndNode;
1325		tmpwndNode->antecedents[0] = commitNode;
1326		tmpwndNode->antType[0] = rf_control;
1327		tmpwndNode = tmpwndNode->list_next;
1328	}
1329	tmpwmirNode = wmirNode;
1330	for (i = 0; i < nWmirNodes; i++) {
1331		RF_ASSERT(tmpwmirNode->numAntecedents == 1);
1332		commitNode->succedents[i + nWndNodes] = tmpwmirNode;
1333		tmpwmirNode->antecedents[0] = commitNode;
1334		tmpwmirNode->antType[0] = rf_control;
1335		tmpwmirNode = tmpwmirNode->list_next;
1336	}
1337
1338	/* link the write nodes to the unblock node */
1339	RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes));
1340	tmpwndNode = wndNode;
1341	for (i = 0; i < nWndNodes; i++) {
1342		RF_ASSERT(tmpwndNode->numSuccedents == 1);
1343		tmpwndNode->succedents[0] = unblockNode;
1344		unblockNode->antecedents[i] = tmpwndNode;
1345		unblockNode->antType[i] = rf_control;
1346		tmpwndNode = tmpwndNode->list_next;
1347	}
1348	tmpwmirNode = wmirNode;
1349	for (i = 0; i < nWmirNodes; i++) {
1350		RF_ASSERT(tmpwmirNode->numSuccedents == 1);
1351		tmpwmirNode->succedents[0] = unblockNode;
1352		unblockNode->antecedents[i + nWndNodes] = tmpwmirNode;
1353		unblockNode->antType[i + nWndNodes] = rf_control;
1354		tmpwmirNode = tmpwmirNode->list_next;
1355	}
1356
1357	/* link the unblock node to the term node */
1358	RF_ASSERT(unblockNode->numSuccedents == 1);
1359	RF_ASSERT(termNode->numAntecedents == 1);
1360	RF_ASSERT(termNode->numSuccedents == 0);
1361	unblockNode->succedents[0] = termNode;
1362	termNode->antecedents[0] = unblockNode;
1363	termNode->antType[0] = rf_control;
1364}
1365