rf_dagffwr.c revision 1.32
1/*	$NetBSD: rf_dagffwr.c,v 1.32 2006/10/12 01:31:50 christos Exp $	*/
2/*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29/*
30 * rf_dagff.c
31 *
32 * code for creating fault-free DAGs
33 *
34 */
35
36#include <sys/cdefs.h>
37__KERNEL_RCSID(0, "$NetBSD: rf_dagffwr.c,v 1.32 2006/10/12 01:31:50 christos Exp $");
38
39#include <dev/raidframe/raidframevar.h>
40
41#include "rf_raid.h"
42#include "rf_dag.h"
43#include "rf_dagutils.h"
44#include "rf_dagfuncs.h"
45#include "rf_debugMem.h"
46#include "rf_dagffrd.h"
47#include "rf_general.h"
48#include "rf_dagffwr.h"
49#include "rf_map.h"
50
51/******************************************************************************
52 *
53 * General comments on DAG creation:
54 *
55 * All DAGs in this file use roll-away error recovery.  Each DAG has a single
56 * commit node, usually called "Cmt."  If an error occurs before the Cmt node
57 * is reached, the execution engine will halt forward execution and work
58 * backward through the graph, executing the undo functions.  Assuming that
59 * each node in the graph prior to the Cmt node are undoable and atomic - or -
60 * does not make changes to permanent state, the graph will fail atomically.
61 * If an error occurs after the Cmt node executes, the engine will roll-forward
62 * through the graph, blindly executing nodes until it reaches the end.
63 * If a graph reaches the end, it is assumed to have completed successfully.
64 *
65 * A graph has only 1 Cmt node.
66 *
67 */
68
69
70/******************************************************************************
71 *
72 * The following wrappers map the standard DAG creation interface to the
73 * DAG creation routines.  Additionally, these wrappers enable experimentation
74 * with new DAG structures by providing an extra level of indirection, allowing
75 * the DAG creation routines to be replaced at this single point.
76 */
77
78
79void
80rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
81			      RF_DagHeader_t *dag_h, void *bp,
82			      RF_RaidAccessFlags_t flags,
83			      RF_AllocListElem_t *allocList,
84			      RF_IoType_t type __unused)
85{
86	rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
87				 RF_IO_TYPE_WRITE);
88}
89
90void
91rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
92		       RF_DagHeader_t *dag_h, void *bp,
93		       RF_RaidAccessFlags_t flags,
94		       RF_AllocListElem_t *allocList,
95		       RF_IoType_t type __unused)
96{
97	rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
98				 RF_IO_TYPE_WRITE);
99}
100
101void
102rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
103		       RF_DagHeader_t *dag_h, void *bp,
104		       RF_RaidAccessFlags_t flags,
105		       RF_AllocListElem_t *allocList)
106{
107	/* "normal" rollaway */
108	rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
109				     allocList, &rf_xorFuncs, NULL);
110}
111
112void
113rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
114		       RF_DagHeader_t *dag_h, void *bp,
115		       RF_RaidAccessFlags_t flags,
116		       RF_AllocListElem_t *allocList)
117{
118	/* "normal" rollaway */
119	rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
120				     allocList, 1, rf_RegularXorFunc, RF_TRUE);
121}
122
123
124/******************************************************************************
125 *
126 * DAG creation code begins here
127 */
128
129
130/******************************************************************************
131 *
132 * creates a DAG to perform a large-write operation:
133 *
134 *           / Rod \           / Wnd \
135 * H -- block- Rod - Xor - Cmt - Wnd --- T
136 *           \ Rod /          \  Wnp /
137 *                             \[Wnq]/
138 *
139 * The XOR node also does the Q calculation in the P+Q architecture.
140 * All nodes are before the commit node (Cmt) are assumed to be atomic and
141 * undoable - or - they make no changes to permanent state.
142 *
143 * Rod = read old data
144 * Cmt = commit node
145 * Wnp = write new parity
146 * Wnd = write new data
147 * Wnq = write new "q"
148 * [] denotes optional segments in the graph
149 *
150 * Parameters:  raidPtr   - description of the physical array
151 *              asmap     - logical & physical addresses for this access
152 *              bp        - buffer ptr (holds write data)
153 *              flags     - general flags (e.g. disk locking)
154 *              allocList - list of memory allocated in DAG creation
155 *              nfaults   - number of faults array can tolerate
156 *                          (equal to # redundancy units in stripe)
157 *              redfuncs  - list of redundancy generating functions
158 *
159 *****************************************************************************/
160
161void
162rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
163			     RF_DagHeader_t *dag_h, void *bp __unused,
164			     RF_RaidAccessFlags_t flags __unused,
165			     RF_AllocListElem_t *allocList,
166			     int nfaults, int (*redFunc) (RF_DagNode_t *),
167			     int allowBufferRecycle)
168{
169	RF_DagNode_t *wndNodes, *rodNodes, *xorNode, *wnpNode, *tmpNode;
170	RF_DagNode_t *wnqNode, *blockNode, *commitNode, *termNode;
171	int     nWndNodes, nRodNodes, i, nodeNum, asmNum;
172	RF_AccessStripeMapHeader_t *new_asm_h[2];
173	RF_StripeNum_t parityStripeID;
174	char   *sosBuffer, *eosBuffer;
175	RF_ReconUnitNum_t which_ru;
176	RF_RaidLayout_t *layoutPtr;
177	RF_PhysDiskAddr_t *pda;
178
179	layoutPtr = &(raidPtr->Layout);
180	parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
181							asmap->raidAddress,
182							&which_ru);
183
184#if RF_DEBUG_DAG
185	if (rf_dagDebug) {
186		printf("[Creating large-write DAG]\n");
187	}
188#endif
189	dag_h->creator = "LargeWriteDAG";
190
191	dag_h->numCommitNodes = 1;
192	dag_h->numCommits = 0;
193	dag_h->numSuccedents = 1;
194
195	/* alloc the nodes: Wnd, xor, commit, block, term, and  Wnp */
196	nWndNodes = asmap->numStripeUnitsAccessed;
197
198	for (i = 0; i < nWndNodes; i++) {
199		tmpNode = rf_AllocDAGNode();
200		tmpNode->list_next = dag_h->nodes;
201		dag_h->nodes = tmpNode;
202	}
203	wndNodes = dag_h->nodes;
204
205	xorNode = rf_AllocDAGNode();
206	xorNode->list_next = dag_h->nodes;
207	dag_h->nodes = xorNode;
208
209	wnpNode = rf_AllocDAGNode();
210	wnpNode->list_next = dag_h->nodes;
211	dag_h->nodes = wnpNode;
212
213	blockNode = rf_AllocDAGNode();
214	blockNode->list_next = dag_h->nodes;
215	dag_h->nodes = blockNode;
216
217	commitNode = rf_AllocDAGNode();
218	commitNode->list_next = dag_h->nodes;
219	dag_h->nodes = commitNode;
220
221	termNode = rf_AllocDAGNode();
222	termNode->list_next = dag_h->nodes;
223	dag_h->nodes = termNode;
224
225#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
226	if (nfaults == 2) {
227		wnqNode = rf_AllocDAGNode();
228	} else {
229#endif
230		wnqNode = NULL;
231#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
232	}
233#endif
234	rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h,
235					new_asm_h, &nRodNodes, &sosBuffer,
236					&eosBuffer, allocList);
237	if (nRodNodes > 0) {
238		for (i = 0; i < nRodNodes; i++) {
239			tmpNode = rf_AllocDAGNode();
240			tmpNode->list_next = dag_h->nodes;
241			dag_h->nodes = tmpNode;
242		}
243		rodNodes = dag_h->nodes;
244	} else {
245		rodNodes = NULL;
246	}
247
248	/* begin node initialization */
249	if (nRodNodes > 0) {
250		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
251			    rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0,
252			    dag_h, "Nil", allocList);
253	} else {
254		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
255			    rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0,
256			    dag_h, "Nil", allocList);
257	}
258
259	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
260		    rf_NullNodeUndoFunc, NULL, nWndNodes + nfaults, 1, 0, 0,
261		    dag_h, "Cmt", allocList);
262	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
263		    rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0,
264		    dag_h, "Trm", allocList);
265
266	/* initialize the Rod nodes */
267	tmpNode = rodNodes;
268	for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
269		if (new_asm_h[asmNum]) {
270			pda = new_asm_h[asmNum]->stripeMap->physInfo;
271			while (pda) {
272				rf_InitNode(tmpNode, rf_wait,
273					    RF_FALSE, rf_DiskReadFunc,
274					    rf_DiskReadUndoFunc,
275					    rf_GenericWakeupFunc,
276					    1, 1, 4, 0, dag_h,
277					    "Rod", allocList);
278				tmpNode->params[0].p = pda;
279				tmpNode->params[1].p = pda->bufPtr;
280				tmpNode->params[2].v = parityStripeID;
281				tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
282				    which_ru);
283				nodeNum++;
284				pda = pda->next;
285				tmpNode = tmpNode->list_next;
286			}
287		}
288	}
289	RF_ASSERT(nodeNum == nRodNodes);
290
291	/* initialize the wnd nodes */
292	pda = asmap->physInfo;
293	tmpNode = wndNodes;
294	for (i = 0; i < nWndNodes; i++) {
295		rf_InitNode(tmpNode, rf_wait, RF_FALSE,
296			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
297			    rf_GenericWakeupFunc, 1, 1, 4, 0,
298			    dag_h, "Wnd", allocList);
299		RF_ASSERT(pda != NULL);
300		tmpNode->params[0].p = pda;
301		tmpNode->params[1].p = pda->bufPtr;
302		tmpNode->params[2].v = parityStripeID;
303		tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
304		pda = pda->next;
305		tmpNode = tmpNode->list_next;
306	}
307
308	/* initialize the redundancy node */
309	if (nRodNodes > 0) {
310		rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
311			    rf_NullNodeUndoFunc, NULL, 1,
312			    nRodNodes, 2 * (nWndNodes + nRodNodes) + 1,
313			    nfaults, dag_h, "Xr ", allocList);
314	} else {
315		rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
316			    rf_NullNodeUndoFunc, NULL, 1,
317			    1, 2 * (nWndNodes + nRodNodes) + 1,
318			    nfaults, dag_h, "Xr ", allocList);
319	}
320	xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
321	tmpNode = wndNodes;
322	for (i = 0; i < nWndNodes; i++) {
323		/* pda */
324		xorNode->params[2 * i + 0] = tmpNode->params[0];
325		/* buf ptr */
326		xorNode->params[2 * i + 1] = tmpNode->params[1];
327		tmpNode = tmpNode->list_next;
328	}
329	tmpNode = rodNodes;
330	for (i = 0; i < nRodNodes; i++) {
331		/* pda */
332		xorNode->params[2 * (nWndNodes + i) + 0] = tmpNode->params[0];
333		/* buf ptr */
334		xorNode->params[2 * (nWndNodes + i) + 1] = tmpNode->params[1];
335		tmpNode = tmpNode->list_next;
336	}
337	/* xor node needs to get at RAID information */
338	xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr;
339
340	/*
341         * Look for an Rod node that reads a complete SU. If none,
342         * alloc a buffer to receive the parity info. Note that we
343         * can't use a new data buffer because it will not have gotten
344         * written when the xor occurs.  */
345	if (allowBufferRecycle) {
346		tmpNode = rodNodes;
347		for (i = 0; i < nRodNodes; i++) {
348			if (((RF_PhysDiskAddr_t *) tmpNode->params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
349				break;
350			tmpNode = tmpNode->list_next;
351		}
352	}
353	if ((!allowBufferRecycle) || (i == nRodNodes)) {
354		xorNode->results[0] = rf_AllocBuffer(raidPtr, dag_h, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit));
355	} else {
356		/* this works because the only way we get here is if
357		   allowBufferRecycle is true and we went through the
358		   above for loop, and exited via the break before
359		   i==nRodNodes was true.  That means tmpNode will
360		   still point to a valid node -- the one we want for
361		   here! */
362		xorNode->results[0] = tmpNode->params[1].p;
363	}
364
365	/* initialize the Wnp node */
366	rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
367		    rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
368		    dag_h, "Wnp", allocList);
369	wnpNode->params[0].p = asmap->parityInfo;
370	wnpNode->params[1].p = xorNode->results[0];
371	wnpNode->params[2].v = parityStripeID;
372	wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
373	/* parityInfo must describe entire parity unit */
374	RF_ASSERT(asmap->parityInfo->next == NULL);
375
376#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
377	if (nfaults == 2) {
378		/*
379	         * We never try to recycle a buffer for the Q calcuation
380	         * in addition to the parity. This would cause two buffers
381	         * to get smashed during the P and Q calculation, guaranteeing
382	         * one would be wrong.
383	         */
384		RF_MallocAndAdd(xorNode->results[1],
385				rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit),
386				(void *), allocList);
387		rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
388			    rf_DiskWriteUndoFunc, rf_GenericWakeupFunc,
389			    1, 1, 4, 0, dag_h, "Wnq", allocList);
390		wnqNode->params[0].p = asmap->qInfo;
391		wnqNode->params[1].p = xorNode->results[1];
392		wnqNode->params[2].v = parityStripeID;
393		wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
394		/* parityInfo must describe entire parity unit */
395		RF_ASSERT(asmap->parityInfo->next == NULL);
396	}
397#endif
398	/*
399         * Connect nodes to form graph.
400         */
401
402	/* connect dag header to block node */
403	RF_ASSERT(blockNode->numAntecedents == 0);
404	dag_h->succedents[0] = blockNode;
405
406	if (nRodNodes > 0) {
407		/* connect the block node to the Rod nodes */
408		RF_ASSERT(blockNode->numSuccedents == nRodNodes);
409		RF_ASSERT(xorNode->numAntecedents == nRodNodes);
410		tmpNode = rodNodes;
411		for (i = 0; i < nRodNodes; i++) {
412			RF_ASSERT(tmpNode->numAntecedents == 1);
413			blockNode->succedents[i] = tmpNode;
414			tmpNode->antecedents[0] = blockNode;
415			tmpNode->antType[0] = rf_control;
416
417			/* connect the Rod nodes to the Xor node */
418			RF_ASSERT(tmpNode->numSuccedents == 1);
419			tmpNode->succedents[0] = xorNode;
420			xorNode->antecedents[i] = tmpNode;
421			xorNode->antType[i] = rf_trueData;
422			tmpNode = tmpNode->list_next;
423		}
424	} else {
425		/* connect the block node to the Xor node */
426		RF_ASSERT(blockNode->numSuccedents == 1);
427		RF_ASSERT(xorNode->numAntecedents == 1);
428		blockNode->succedents[0] = xorNode;
429		xorNode->antecedents[0] = blockNode;
430		xorNode->antType[0] = rf_control;
431	}
432
433	/* connect the xor node to the commit node */
434	RF_ASSERT(xorNode->numSuccedents == 1);
435	RF_ASSERT(commitNode->numAntecedents == 1);
436	xorNode->succedents[0] = commitNode;
437	commitNode->antecedents[0] = xorNode;
438	commitNode->antType[0] = rf_control;
439
440	/* connect the commit node to the write nodes */
441	RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults);
442	tmpNode = wndNodes;
443	for (i = 0; i < nWndNodes; i++) {
444		RF_ASSERT(wndNodes->numAntecedents == 1);
445		commitNode->succedents[i] = tmpNode;
446		tmpNode->antecedents[0] = commitNode;
447		tmpNode->antType[0] = rf_control;
448		tmpNode = tmpNode->list_next;
449	}
450	RF_ASSERT(wnpNode->numAntecedents == 1);
451	commitNode->succedents[nWndNodes] = wnpNode;
452	wnpNode->antecedents[0] = commitNode;
453	wnpNode->antType[0] = rf_trueData;
454#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
455	if (nfaults == 2) {
456		RF_ASSERT(wnqNode->numAntecedents == 1);
457		commitNode->succedents[nWndNodes + 1] = wnqNode;
458		wnqNode->antecedents[0] = commitNode;
459		wnqNode->antType[0] = rf_trueData;
460	}
461#endif
462	/* connect the write nodes to the term node */
463	RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults);
464	RF_ASSERT(termNode->numSuccedents == 0);
465	tmpNode = wndNodes;
466	for (i = 0; i < nWndNodes; i++) {
467		RF_ASSERT(wndNodes->numSuccedents == 1);
468		tmpNode->succedents[0] = termNode;
469		termNode->antecedents[i] = tmpNode;
470		termNode->antType[i] = rf_control;
471		tmpNode = tmpNode->list_next;
472	}
473	RF_ASSERT(wnpNode->numSuccedents == 1);
474	wnpNode->succedents[0] = termNode;
475	termNode->antecedents[nWndNodes] = wnpNode;
476	termNode->antType[nWndNodes] = rf_control;
477#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
478	if (nfaults == 2) {
479		RF_ASSERT(wnqNode->numSuccedents == 1);
480		wnqNode->succedents[0] = termNode;
481		termNode->antecedents[nWndNodes + 1] = wnqNode;
482		termNode->antType[nWndNodes + 1] = rf_control;
483	}
484#endif
485}
486/******************************************************************************
487 *
488 * creates a DAG to perform a small-write operation (either raid 5 or pq),
489 * which is as follows:
490 *
491 * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm
492 *            \- Rod X      /     \----> Wnd [Und]-/
493 *           [\- Rod X     /       \---> Wnd [Und]-/]
494 *           [\- Roq -> Q /         \--> Wnq [Unq]-/]
495 *
496 * Rop = read old parity
497 * Rod = read old data
498 * Roq = read old "q"
499 * Cmt = commit node
500 * Und = unlock data disk
501 * Unp = unlock parity disk
502 * Unq = unlock q disk
503 * Wnp = write new parity
504 * Wnd = write new data
505 * Wnq = write new "q"
506 * [ ] denotes optional segments in the graph
507 *
508 * Parameters:  raidPtr   - description of the physical array
509 *              asmap     - logical & physical addresses for this access
510 *              bp        - buffer ptr (holds write data)
511 *              flags     - general flags (e.g. disk locking)
512 *              allocList - list of memory allocated in DAG creation
513 *              pfuncs    - list of parity generating functions
514 *              qfuncs    - list of q generating functions
515 *
516 * A null qfuncs indicates single fault tolerant
517 *****************************************************************************/
518
519void
520rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
521			     RF_DagHeader_t *dag_h, void *bp __unused,
522			     RF_RaidAccessFlags_t flags __unused,
523			     RF_AllocListElem_t *allocList,
524			     const RF_RedFuncs_t *pfuncs,
525			     const RF_RedFuncs_t *qfuncs)
526{
527	RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode;
528	RF_DagNode_t *tmpNode, *tmpreadDataNode, *tmpreadParityNode;
529	RF_DagNode_t *xorNodes, *qNodes, *blockNode, *commitNode;
530	RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes;
531	RF_DagNode_t *tmpxorNode, *tmpqNode, *tmpwriteDataNode, *tmpreadQNode;
532	RF_DagNode_t *tmpwriteParityNode;
533#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
534	RF_DagNode_t *tmpwriteQNode;
535#endif
536	int     i, j, nNodes, totalNumNodes;
537	RF_ReconUnitNum_t which_ru;
538	int     (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
539	int     (*qfunc) (RF_DagNode_t *);
540	int     numDataNodes, numParityNodes;
541	RF_StripeNum_t parityStripeID;
542	RF_PhysDiskAddr_t *pda;
543	const char *name, *qname;
544	long    nfaults;
545
546	nfaults = qfuncs ? 2 : 1;
547
548	parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
549	    asmap->raidAddress, &which_ru);
550	pda = asmap->physInfo;
551	numDataNodes = asmap->numStripeUnitsAccessed;
552	numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
553
554#if RF_DEBUG_DAG
555	if (rf_dagDebug) {
556		printf("[Creating small-write DAG]\n");
557	}
558#endif
559	RF_ASSERT(numDataNodes > 0);
560	dag_h->creator = "SmallWriteDAG";
561
562	dag_h->numCommitNodes = 1;
563	dag_h->numCommits = 0;
564	dag_h->numSuccedents = 1;
565
566	/*
567         * DAG creation occurs in four steps:
568         * 1. count the number of nodes in the DAG
569         * 2. create the nodes
570         * 3. initialize the nodes
571         * 4. connect the nodes
572         */
573
574	/*
575         * Step 1. compute number of nodes in the graph
576         */
577
578	/* number of nodes: a read and write for each data unit a
579	 * redundancy computation node for each parity node (nfaults *
580	 * nparity) a read and write for each parity unit a block and
581	 * commit node (2) a terminate node if atomic RMW an unlock
582	 * node for each data unit, redundancy unit */
583	totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes)
584	    + (nfaults * 2 * numParityNodes) + 3;
585	/*
586         * Step 2. create the nodes
587         */
588
589	blockNode = rf_AllocDAGNode();
590	blockNode->list_next = dag_h->nodes;
591	dag_h->nodes = blockNode;
592
593	commitNode = rf_AllocDAGNode();
594	commitNode->list_next = dag_h->nodes;
595	dag_h->nodes = commitNode;
596
597	for (i = 0; i < numDataNodes; i++) {
598		tmpNode = rf_AllocDAGNode();
599		tmpNode->list_next = dag_h->nodes;
600		dag_h->nodes = tmpNode;
601	}
602	readDataNodes = dag_h->nodes;
603
604	for (i = 0; i < numParityNodes; i++) {
605		tmpNode = rf_AllocDAGNode();
606		tmpNode->list_next = dag_h->nodes;
607		dag_h->nodes = tmpNode;
608	}
609	readParityNodes = dag_h->nodes;
610
611	for (i = 0; i < numDataNodes; i++) {
612		tmpNode = rf_AllocDAGNode();
613		tmpNode->list_next = dag_h->nodes;
614		dag_h->nodes = tmpNode;
615	}
616	writeDataNodes = dag_h->nodes;
617
618	for (i = 0; i < numParityNodes; i++) {
619		tmpNode = rf_AllocDAGNode();
620		tmpNode->list_next = dag_h->nodes;
621		dag_h->nodes = tmpNode;
622	}
623	writeParityNodes = dag_h->nodes;
624
625	for (i = 0; i < numParityNodes; i++) {
626		tmpNode = rf_AllocDAGNode();
627		tmpNode->list_next = dag_h->nodes;
628		dag_h->nodes = tmpNode;
629	}
630	xorNodes = dag_h->nodes;
631
632	termNode = rf_AllocDAGNode();
633	termNode->list_next = dag_h->nodes;
634	dag_h->nodes = termNode;
635
636#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
637	if (nfaults == 2) {
638		for (i = 0; i < numParityNodes; i++) {
639			tmpNode = rf_AllocDAGNode();
640			tmpNode->list_next = dag_h->nodes;
641			dag_h->nodes = tmpNode;
642		}
643		readQNodes = dag_h->nodes;
644
645		for (i = 0; i < numParityNodes; i++) {
646			tmpNode = rf_AllocDAGNode();
647			tmpNode->list_next = dag_h->nodes;
648			dag_h->nodes = tmpNode;
649		}
650		writeQNodes = dag_h->nodes;
651
652		for (i = 0; i < numParityNodes; i++) {
653			tmpNode = rf_AllocDAGNode();
654			tmpNode->list_next = dag_h->nodes;
655			dag_h->nodes = tmpNode;
656		}
657		qNodes = dag_h->nodes;
658	} else {
659#endif
660		readQNodes = writeQNodes = qNodes = NULL;
661#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
662	}
663#endif
664
665	/*
666         * Step 3. initialize the nodes
667         */
668	/* initialize block node (Nil) */
669	nNodes = numDataNodes + (nfaults * numParityNodes);
670	rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
671		    rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0,
672		    dag_h, "Nil", allocList);
673
674	/* initialize commit node (Cmt) */
675	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
676		    rf_NullNodeUndoFunc, NULL, nNodes,
677		    (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList);
678
679	/* initialize terminate node (Trm) */
680	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
681		    rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0,
682		    dag_h, "Trm", allocList);
683
684	/* initialize nodes which read old data (Rod) */
685	tmpreadDataNode = readDataNodes;
686	for (i = 0; i < numDataNodes; i++) {
687		rf_InitNode(tmpreadDataNode, rf_wait, RF_FALSE,
688			    rf_DiskReadFunc, rf_DiskReadUndoFunc,
689			    rf_GenericWakeupFunc, (nfaults * numParityNodes),
690			    1, 4, 0, dag_h, "Rod", allocList);
691		RF_ASSERT(pda != NULL);
692		/* physical disk addr desc */
693		tmpreadDataNode->params[0].p = pda;
694		/* buffer to hold old data */
695		tmpreadDataNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
696		tmpreadDataNode->params[2].v = parityStripeID;
697		tmpreadDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
698		    which_ru);
699		pda = pda->next;
700		for (j = 0; j < tmpreadDataNode->numSuccedents; j++) {
701			tmpreadDataNode->propList[j] = NULL;
702		}
703		tmpreadDataNode = tmpreadDataNode->list_next;
704	}
705
706	/* initialize nodes which read old parity (Rop) */
707	pda = asmap->parityInfo;
708	i = 0;
709	tmpreadParityNode = readParityNodes;
710	for (i = 0; i < numParityNodes; i++) {
711		RF_ASSERT(pda != NULL);
712		rf_InitNode(tmpreadParityNode, rf_wait, RF_FALSE,
713			    rf_DiskReadFunc, rf_DiskReadUndoFunc,
714			    rf_GenericWakeupFunc, numParityNodes, 1, 4, 0,
715			    dag_h, "Rop", allocList);
716		tmpreadParityNode->params[0].p = pda;
717		/* buffer to hold old parity */
718		tmpreadParityNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
719		tmpreadParityNode->params[2].v = parityStripeID;
720		tmpreadParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
721		    which_ru);
722		pda = pda->next;
723		for (j = 0; j < tmpreadParityNode->numSuccedents; j++) {
724			tmpreadParityNode->propList[0] = NULL;
725		}
726		tmpreadParityNode = tmpreadParityNode->list_next;
727	}
728
729#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
730	/* initialize nodes which read old Q (Roq) */
731	if (nfaults == 2) {
732		pda = asmap->qInfo;
733		tmpreadQNode = readQNodes;
734		for (i = 0; i < numParityNodes; i++) {
735			RF_ASSERT(pda != NULL);
736			rf_InitNode(tmpreadQNode, rf_wait, RF_FALSE,
737				    rf_DiskReadFunc, rf_DiskReadUndoFunc,
738				    rf_GenericWakeupFunc, numParityNodes,
739				    1, 4, 0, dag_h, "Roq", allocList);
740			tmpreadQNode->params[0].p = pda;
741			/* buffer to hold old Q */
742			tmpreadQNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h,
743								   pda->numSector << raidPtr->logBytesPerSector);
744			tmpreadQNode->params[2].v = parityStripeID;
745			tmpreadQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
746			    which_ru);
747			pda = pda->next;
748			for (j = 0; j < tmpreadQNode->numSuccedents; j++) {
749				tmpreadQNode->propList[0] = NULL;
750			}
751			tmpreadQNode = tmpreadQNode->list_next;
752		}
753	}
754#endif
755	/* initialize nodes which write new data (Wnd) */
756	pda = asmap->physInfo;
757	tmpwriteDataNode = writeDataNodes;
758	for (i = 0; i < numDataNodes; i++) {
759		RF_ASSERT(pda != NULL);
760		rf_InitNode(tmpwriteDataNode, rf_wait, RF_FALSE,
761			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
762			    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
763			    "Wnd", allocList);
764		/* physical disk addr desc */
765		tmpwriteDataNode->params[0].p = pda;
766		/* buffer holding new data to be written */
767		tmpwriteDataNode->params[1].p = pda->bufPtr;
768		tmpwriteDataNode->params[2].v = parityStripeID;
769		tmpwriteDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
770		    which_ru);
771		pda = pda->next;
772		tmpwriteDataNode = tmpwriteDataNode->list_next;
773	}
774
775	/*
776         * Initialize nodes which compute new parity and Q.
777         */
778	/*
779         * We use the simple XOR func in the double-XOR case, and when
780         * we're accessing only a portion of one stripe unit. The
781         * distinction between the two is that the regular XOR func
782         * assumes that the targbuf is a full SU in size, and examines
783         * the pda associated with the buffer to decide where within
784         * the buffer to XOR the data, whereas the simple XOR func
785         * just XORs the data into the start of the buffer.  */
786	if ((numParityNodes == 2) || ((numDataNodes == 1)
787		&& (asmap->totalSectorsAccessed <
788		    raidPtr->Layout.sectorsPerStripeUnit))) {
789		func = pfuncs->simple;
790		undoFunc = rf_NullNodeUndoFunc;
791		name = pfuncs->SimpleName;
792		if (qfuncs) {
793			qfunc = qfuncs->simple;
794			qname = qfuncs->SimpleName;
795		} else {
796			qfunc = NULL;
797			qname = NULL;
798		}
799	} else {
800		func = pfuncs->regular;
801		undoFunc = rf_NullNodeUndoFunc;
802		name = pfuncs->RegularName;
803		if (qfuncs) {
804			qfunc = qfuncs->regular;
805			qname = qfuncs->RegularName;
806		} else {
807			qfunc = NULL;
808			qname = NULL;
809		}
810	}
811	/*
812         * Initialize the xor nodes: params are {pda,buf}
813         * from {Rod,Wnd,Rop} nodes, and raidPtr
814         */
815	if (numParityNodes == 2) {
816		/* double-xor case */
817		tmpxorNode = xorNodes;
818		tmpreadDataNode = readDataNodes;
819		tmpreadParityNode = readParityNodes;
820		tmpwriteDataNode = writeDataNodes;
821		tmpqNode = qNodes;
822		tmpreadQNode = readQNodes;
823		for (i = 0; i < numParityNodes; i++) {
824			/* note: no wakeup func for xor */
825			rf_InitNode(tmpxorNode, rf_wait, RF_FALSE, func,
826				    undoFunc, NULL, 1,
827				    (numDataNodes + numParityNodes),
828				    7, 1, dag_h, name, allocList);
829			tmpxorNode->flags |= RF_DAGNODE_FLAG_YIELD;
830			tmpxorNode->params[0] = tmpreadDataNode->params[0];
831			tmpxorNode->params[1] = tmpreadDataNode->params[1];
832			tmpxorNode->params[2] = tmpreadParityNode->params[0];
833			tmpxorNode->params[3] = tmpreadParityNode->params[1];
834			tmpxorNode->params[4] = tmpwriteDataNode->params[0];
835			tmpxorNode->params[5] = tmpwriteDataNode->params[1];
836			tmpxorNode->params[6].p = raidPtr;
837			/* use old parity buf as target buf */
838			tmpxorNode->results[0] = tmpreadParityNode->params[1].p;
839#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
840			if (nfaults == 2) {
841				/* note: no wakeup func for qor */
842				rf_InitNode(tmpqNode, rf_wait, RF_FALSE,
843					    qfunc, undoFunc, NULL, 1,
844					    (numDataNodes + numParityNodes),
845					    7, 1, dag_h, qname, allocList);
846				tmpqNode->params[0] = tmpreadDataNode->params[0];
847				tmpqNode->params[1] = tmpreadDataNode->params[1];
848				tmpqNode->params[2] = tmpreadQNode->.params[0];
849				tmpqNode->params[3] = tmpreadQNode->params[1];
850				tmpqNode->params[4] = tmpwriteDataNode->params[0];
851				tmpqNode->params[5] = tmpwriteDataNode->params[1];
852				tmpqNode->params[6].p = raidPtr;
853				/* use old Q buf as target buf */
854				tmpqNode->results[0] = tmpreadQNode->params[1].p;
855				tmpqNode = tmpqNode->list_next;
856				tmpreadQNodes = tmpreadQNodes->list_next;
857			}
858#endif
859			tmpxorNode = tmpxorNode->list_next;
860			tmpreadDataNode = tmpreadDataNode->list_next;
861			tmpreadParityNode = tmpreadParityNode->list_next;
862			tmpwriteDataNode = tmpwriteDataNode->list_next;
863		}
864	} else {
865		/* there is only one xor node in this case */
866		rf_InitNode(xorNodes, rf_wait, RF_FALSE, func,
867			    undoFunc, NULL, 1, (numDataNodes + numParityNodes),
868			    (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
869			    dag_h, name, allocList);
870		xorNodes->flags |= RF_DAGNODE_FLAG_YIELD;
871		tmpreadDataNode = readDataNodes;
872		for (i = 0; i < numDataNodes; i++) { /* used to be"numDataNodes + 1" until we factored
873							out the "+1" into the "deal with Rop separately below */
874			/* set up params related to Rod nodes */
875			xorNodes->params[2 * i + 0] = tmpreadDataNode->params[0];	/* pda */
876			xorNodes->params[2 * i + 1] = tmpreadDataNode->params[1];	/* buffer ptr */
877			tmpreadDataNode = tmpreadDataNode->list_next;
878		}
879		/* deal with Rop separately */
880		xorNodes->params[2 * numDataNodes + 0] = readParityNodes->params[0];    /* pda */
881		xorNodes->params[2 * numDataNodes + 1] = readParityNodes->params[1];    /* buffer ptr */
882
883		tmpwriteDataNode = writeDataNodes;
884		for (i = 0; i < numDataNodes; i++) {
885			/* set up params related to Wnd and Wnp nodes */
886			xorNodes->params[2 * (numDataNodes + 1 + i) + 0] =	/* pda */
887			    tmpwriteDataNode->params[0];
888			xorNodes->params[2 * (numDataNodes + 1 + i) + 1] =	/* buffer ptr */
889			    tmpwriteDataNode->params[1];
890			tmpwriteDataNode = tmpwriteDataNode->list_next;
891		}
892		/* xor node needs to get at RAID information */
893		xorNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
894		xorNodes->results[0] = readParityNodes->params[1].p;
895#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
896		if (nfaults == 2) {
897			rf_InitNode(qNodes, rf_wait, RF_FALSE, qfunc,
898				    undoFunc, NULL, 1,
899				    (numDataNodes + numParityNodes),
900				    (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
901				    dag_h, qname, allocList);
902			tmpreadDataNode = readDataNodes;
903			for (i = 0; i < numDataNodes; i++) {
904				/* set up params related to Rod */
905				qNodes->params[2 * i + 0] = tmpreadDataNode->params[0];	/* pda */
906				qNodes->params[2 * i + 1] = tmpreadDataNode->params[1];	/* buffer ptr */
907				tmpreadDataNode = tmpreadDataNode->list_next;
908			}
909			/* and read old q */
910			qNodes->params[2 * numDataNodes + 0] =	/* pda */
911			    readQNodes->params[0];
912			qNodes->params[2 * numDataNodes + 1] =	/* buffer ptr */
913			    readQNodes->params[1];
914			tmpwriteDataNode = writeDataNodes;
915			for (i = 0; i < numDataNodes; i++) {
916				/* set up params related to Wnd nodes */
917				qNodes->params[2 * (numDataNodes + 1 + i) + 0] =	/* pda */
918				    tmpwriteDataNode->params[0];
919				qNodes->params[2 * (numDataNodes + 1 + i) + 1] =	/* buffer ptr */
920				    tmpwriteDataNode->params[1];
921				tmpwriteDataNode = tmpwriteDataNode->list_next;
922			}
923			/* xor node needs to get at RAID information */
924			qNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
925			qNodes->results[0] = readQNodes->params[1].p;
926		}
927#endif
928	}
929
930	/* initialize nodes which write new parity (Wnp) */
931	pda = asmap->parityInfo;
932	tmpwriteParityNode = writeParityNodes;
933	tmpxorNode = xorNodes;
934	for (i = 0; i < numParityNodes; i++) {
935		rf_InitNode(tmpwriteParityNode, rf_wait, RF_FALSE,
936			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
937			    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
938			    "Wnp", allocList);
939		RF_ASSERT(pda != NULL);
940		tmpwriteParityNode->params[0].p = pda;	/* param 1 (bufPtr)
941				  			 * filled in by xor node */
942		tmpwriteParityNode->params[1].p = tmpxorNode->results[0];	/* buffer pointer for
943				  						 * parity write
944				  						 * operation */
945		tmpwriteParityNode->params[2].v = parityStripeID;
946		tmpwriteParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
947		    which_ru);
948		pda = pda->next;
949		tmpwriteParityNode = tmpwriteParityNode->list_next;
950		tmpxorNode = tmpxorNode->list_next;
951	}
952
953#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
954	/* initialize nodes which write new Q (Wnq) */
955	if (nfaults == 2) {
956		pda = asmap->qInfo;
957		tmpwriteQNode = writeQNodes;
958		tmpqNode = qNodes;
959		for (i = 0; i < numParityNodes; i++) {
960			rf_InitNode(tmpwriteQNode, rf_wait, RF_FALSE,
961				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
962				    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
963				    "Wnq", allocList);
964			RF_ASSERT(pda != NULL);
965			tmpwriteQNode->params[0].p = pda;	/* param 1 (bufPtr)
966								 * filled in by xor node */
967			tmpwriteQNode->params[1].p = tmpqNode->results[0];	/* buffer pointer for
968										 * parity write
969										 * operation */
970			tmpwriteQNode->params[2].v = parityStripeID;
971			tmpwriteQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
972			    which_ru);
973			pda = pda->next;
974			tmpwriteQNode = tmpwriteQNode->list_next;
975			tmpqNode = tmpqNode->list_next;
976		}
977	}
978#endif
979	/*
980         * Step 4. connect the nodes.
981         */
982
983	/* connect header to block node */
984	dag_h->succedents[0] = blockNode;
985
986	/* connect block node to read old data nodes */
987	RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults)));
988	tmpreadDataNode = readDataNodes;
989	for (i = 0; i < numDataNodes; i++) {
990		blockNode->succedents[i] = tmpreadDataNode;
991		RF_ASSERT(tmpreadDataNode->numAntecedents == 1);
992		tmpreadDataNode->antecedents[0] = blockNode;
993		tmpreadDataNode->antType[0] = rf_control;
994		tmpreadDataNode = tmpreadDataNode->list_next;
995	}
996
997	/* connect block node to read old parity nodes */
998	tmpreadParityNode = readParityNodes;
999	for (i = 0; i < numParityNodes; i++) {
1000		blockNode->succedents[numDataNodes + i] = tmpreadParityNode;
1001		RF_ASSERT(tmpreadParityNode->numAntecedents == 1);
1002		tmpreadParityNode->antecedents[0] = blockNode;
1003		tmpreadParityNode->antType[0] = rf_control;
1004		tmpreadParityNode = tmpreadParityNode->list_next;
1005	}
1006
1007#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1008	/* connect block node to read old Q nodes */
1009	if (nfaults == 2) {
1010		tmpreadQNode = readQNodes;
1011		for (i = 0; i < numParityNodes; i++) {
1012			blockNode->succedents[numDataNodes + numParityNodes + i] = tmpreadQNode;
1013			RF_ASSERT(tmpreadQNode->numAntecedents == 1);
1014			tmpreadQNode->antecedents[0] = blockNode;
1015			tmpreadQNode->antType[0] = rf_control;
1016			tmpreadQNode = tmpreadQNode->list_next;
1017		}
1018	}
1019#endif
1020	/* connect read old data nodes to xor nodes */
1021	tmpreadDataNode = readDataNodes;
1022	for (i = 0; i < numDataNodes; i++) {
1023		RF_ASSERT(tmpreadDataNode->numSuccedents == (nfaults * numParityNodes));
1024		tmpxorNode = xorNodes;
1025		for (j = 0; j < numParityNodes; j++) {
1026			RF_ASSERT(tmpxorNode->numAntecedents == numDataNodes + numParityNodes);
1027			tmpreadDataNode->succedents[j] = tmpxorNode;
1028			tmpxorNode->antecedents[i] = tmpreadDataNode;
1029			tmpxorNode->antType[i] = rf_trueData;
1030			tmpxorNode = tmpxorNode->list_next;
1031		}
1032		tmpreadDataNode = tmpreadDataNode->list_next;
1033	}
1034
1035#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1036	/* connect read old data nodes to q nodes */
1037	if (nfaults == 2) {
1038		tmpreadDataNode = readDataNodes;
1039		for (i = 0; i < numDataNodes; i++) {
1040			tmpqNode = qNodes;
1041			for (j = 0; j < numParityNodes; j++) {
1042				RF_ASSERT(tmpqNode->numAntecedents == numDataNodes + numParityNodes);
1043				tmpreadDataNode->succedents[numParityNodes + j] = tmpqNode;
1044				tmpqNode->antecedents[i] = tmpreadDataNode;
1045				tmpqNode->antType[i] = rf_trueData;
1046				tmpqNode = tmpqNode->list_next;
1047			}
1048			tmpreadDataNode = tmpreadDataNode->list_next;
1049		}
1050	}
1051#endif
1052	/* connect read old parity nodes to xor nodes */
1053	tmpreadParityNode = readParityNodes;
1054	for (i = 0; i < numParityNodes; i++) {
1055		RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1056		tmpxorNode = xorNodes;
1057		for (j = 0; j < numParityNodes; j++) {
1058			tmpreadParityNode->succedents[j] = tmpxorNode;
1059			tmpxorNode->antecedents[numDataNodes + i] = tmpreadParityNode;
1060			tmpxorNode->antType[numDataNodes + i] = rf_trueData;
1061			tmpxorNode = tmpxorNode->list_next;
1062		}
1063		tmpreadParityNode = tmpreadParityNode->list_next;
1064	}
1065
1066#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1067	/* connect read old q nodes to q nodes */
1068	if (nfaults == 2) {
1069		tmpreadParityNode = readParityNodes;
1070		tmpreadQNode = readQNodes;
1071		for (i = 0; i < numParityNodes; i++) {
1072			RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1073			tmpqNode = qNodes;
1074			for (j = 0; j < numParityNodes; j++) {
1075				tmpreadQNode->succedents[j] = tmpqNode;
1076				tmpqNode->antecedents[numDataNodes + i] = tmpreadQNodes;
1077				tmpqNode->antType[numDataNodes + i] = rf_trueData;
1078				tmpqNode = tmpqNode->list_next;
1079			}
1080			tmpreadParityNode = tmpreadParityNode->list_next;
1081			tmpreadQNode = tmpreadQNode->list_next;
1082		}
1083	}
1084#endif
1085	/* connect xor nodes to commit node */
1086	RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes));
1087	tmpxorNode = xorNodes;
1088	for (i = 0; i < numParityNodes; i++) {
1089		RF_ASSERT(tmpxorNode->numSuccedents == 1);
1090		tmpxorNode->succedents[0] = commitNode;
1091		commitNode->antecedents[i] = tmpxorNode;
1092		commitNode->antType[i] = rf_control;
1093		tmpxorNode = tmpxorNode->list_next;
1094	}
1095
1096#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1097	/* connect q nodes to commit node */
1098	if (nfaults == 2) {
1099		tmpqNode = qNodes;
1100		for (i = 0; i < numParityNodes; i++) {
1101			RF_ASSERT(tmpqNode->numSuccedents == 1);
1102			tmpqNode->succedents[0] = commitNode;
1103			commitNode->antecedents[i + numParityNodes] = tmpqNode;
1104			commitNode->antType[i + numParityNodes] = rf_control;
1105			tmpqNode = tmpqNode->list_next;
1106		}
1107	}
1108#endif
1109	/* connect commit node to write nodes */
1110	RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes)));
1111	tmpwriteDataNode = writeDataNodes;
1112	for (i = 0; i < numDataNodes; i++) {
1113		RF_ASSERT(tmpwriteDataNode->numAntecedents == 1);
1114		commitNode->succedents[i] = tmpwriteDataNode;
1115		tmpwriteDataNode->antecedents[0] = commitNode;
1116		tmpwriteDataNode->antType[0] = rf_trueData;
1117		tmpwriteDataNode = tmpwriteDataNode->list_next;
1118	}
1119	tmpwriteParityNode = writeParityNodes;
1120	for (i = 0; i < numParityNodes; i++) {
1121		RF_ASSERT(tmpwriteParityNode->numAntecedents == 1);
1122		commitNode->succedents[i + numDataNodes] = tmpwriteParityNode;
1123		tmpwriteParityNode->antecedents[0] = commitNode;
1124		tmpwriteParityNode->antType[0] = rf_trueData;
1125		tmpwriteParityNode = tmpwriteParityNode->list_next;
1126	}
1127#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1128	if (nfaults == 2) {
1129		tmpwriteQNode = writeQNodes;
1130		for (i = 0; i < numParityNodes; i++) {
1131			RF_ASSERT(tmpwriteQNode->numAntecedents == 1);
1132			commitNode->succedents[i + numDataNodes + numParityNodes] = tmpwriteQNode;
1133			tmpwriteQNode->antecedents[0] = commitNode;
1134			tmpwriteQNode->antType[0] = rf_trueData;
1135			tmpwriteQNode = tmpwriteQNode->list_next;
1136		}
1137	}
1138#endif
1139	RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1140	RF_ASSERT(termNode->numSuccedents == 0);
1141	tmpwriteDataNode = writeDataNodes;
1142	for (i = 0; i < numDataNodes; i++) {
1143		/* connect write new data nodes to term node */
1144		RF_ASSERT(tmpwriteDataNode->numSuccedents == 1);
1145		RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1146		tmpwriteDataNode->succedents[0] = termNode;
1147		termNode->antecedents[i] = tmpwriteDataNode;
1148		termNode->antType[i] = rf_control;
1149		tmpwriteDataNode = tmpwriteDataNode->list_next;
1150	}
1151
1152	tmpwriteParityNode = writeParityNodes;
1153	for (i = 0; i < numParityNodes; i++) {
1154		RF_ASSERT(tmpwriteParityNode->numSuccedents == 1);
1155		tmpwriteParityNode->succedents[0] = termNode;
1156		termNode->antecedents[numDataNodes + i] = tmpwriteParityNode;
1157		termNode->antType[numDataNodes + i] = rf_control;
1158		tmpwriteParityNode = tmpwriteParityNode->list_next;
1159	}
1160
1161#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1162	if (nfaults == 2) {
1163		tmpwriteQNode = writeQNodes;
1164		for (i = 0; i < numParityNodes; i++) {
1165			RF_ASSERT(tmpwriteQNode->numSuccedents == 1);
1166			tmpwriteQNode->succedents[0] = termNode;
1167			termNode->antecedents[numDataNodes + numParityNodes + i] = tmpwriteQNode;
1168			termNode->antType[numDataNodes + numParityNodes + i] = rf_control;
1169			tmpwriteQNode = tmpwriteQNode->list_next;
1170		}
1171	}
1172#endif
1173}
1174
1175
1176/******************************************************************************
1177 * create a write graph (fault-free or degraded) for RAID level 1
1178 *
1179 * Hdr -> Commit -> Wpd -> Nil -> Trm
1180 *               -> Wsd ->
1181 *
1182 * The "Wpd" node writes data to the primary copy in the mirror pair
1183 * The "Wsd" node writes data to the secondary copy in the mirror pair
1184 *
1185 * Parameters:  raidPtr   - description of the physical array
1186 *              asmap     - logical & physical addresses for this access
1187 *              bp        - buffer ptr (holds write data)
1188 *              flags     - general flags (e.g. disk locking)
1189 *              allocList - list of memory allocated in DAG creation
1190 *****************************************************************************/
1191
1192void
1193rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
1194			 RF_DagHeader_t *dag_h, void *bp __unused,
1195			 RF_RaidAccessFlags_t flags __unused,
1196			 RF_AllocListElem_t *allocList)
1197{
1198	RF_DagNode_t *unblockNode, *termNode, *commitNode;
1199	RF_DagNode_t *wndNode, *wmirNode;
1200	RF_DagNode_t *tmpNode, *tmpwndNode, *tmpwmirNode;
1201	int     nWndNodes, nWmirNodes, i;
1202	RF_ReconUnitNum_t which_ru;
1203	RF_PhysDiskAddr_t *pda, *pdaP;
1204	RF_StripeNum_t parityStripeID;
1205
1206	parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
1207	    asmap->raidAddress, &which_ru);
1208#if RF_DEBUG_DAG
1209	if (rf_dagDebug) {
1210		printf("[Creating RAID level 1 write DAG]\n");
1211	}
1212#endif
1213	dag_h->creator = "RaidOneWriteDAG";
1214
1215	/* 2 implies access not SU aligned */
1216	nWmirNodes = (asmap->parityInfo->next) ? 2 : 1;
1217	nWndNodes = (asmap->physInfo->next) ? 2 : 1;
1218
1219	/* alloc the Wnd nodes and the Wmir node */
1220	if (asmap->numDataFailed == 1)
1221		nWndNodes--;
1222	if (asmap->numParityFailed == 1)
1223		nWmirNodes--;
1224
1225	/* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock
1226	 * + terminator) */
1227	for (i = 0; i < nWndNodes; i++) {
1228		tmpNode = rf_AllocDAGNode();
1229		tmpNode->list_next = dag_h->nodes;
1230		dag_h->nodes = tmpNode;
1231	}
1232	wndNode = dag_h->nodes;
1233
1234	for (i = 0; i < nWmirNodes; i++) {
1235		tmpNode = rf_AllocDAGNode();
1236		tmpNode->list_next = dag_h->nodes;
1237		dag_h->nodes = tmpNode;
1238	}
1239	wmirNode = dag_h->nodes;
1240
1241	commitNode = rf_AllocDAGNode();
1242	commitNode->list_next = dag_h->nodes;
1243	dag_h->nodes = commitNode;
1244
1245	unblockNode = rf_AllocDAGNode();
1246	unblockNode->list_next = dag_h->nodes;
1247	dag_h->nodes = unblockNode;
1248
1249	termNode = rf_AllocDAGNode();
1250	termNode->list_next = dag_h->nodes;
1251	dag_h->nodes = termNode;
1252
1253	/* this dag can commit immediately */
1254	dag_h->numCommitNodes = 1;
1255	dag_h->numCommits = 0;
1256	dag_h->numSuccedents = 1;
1257
1258	/* initialize the commit, unblock, and term nodes */
1259	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
1260		    rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes),
1261		    0, 0, 0, dag_h, "Cmt", allocList);
1262	rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
1263		    rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes),
1264		    0, 0, dag_h, "Nil", allocList);
1265	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
1266		    rf_TerminateUndoFunc, NULL, 0, 1, 0, 0,
1267		    dag_h, "Trm", allocList);
1268
1269	/* initialize the wnd nodes */
1270	if (nWndNodes > 0) {
1271		pda = asmap->physInfo;
1272		tmpwndNode = wndNode;
1273		for (i = 0; i < nWndNodes; i++) {
1274			rf_InitNode(tmpwndNode, rf_wait, RF_FALSE,
1275				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1276				    rf_GenericWakeupFunc, 1, 1, 4, 0,
1277				    dag_h, "Wpd", allocList);
1278			RF_ASSERT(pda != NULL);
1279			tmpwndNode->params[0].p = pda;
1280			tmpwndNode->params[1].p = pda->bufPtr;
1281			tmpwndNode->params[2].v = parityStripeID;
1282			tmpwndNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1283			pda = pda->next;
1284			tmpwndNode = tmpwndNode->list_next;
1285		}
1286		RF_ASSERT(pda == NULL);
1287	}
1288	/* initialize the mirror nodes */
1289	if (nWmirNodes > 0) {
1290		pda = asmap->physInfo;
1291		pdaP = asmap->parityInfo;
1292		tmpwmirNode = wmirNode;
1293		for (i = 0; i < nWmirNodes; i++) {
1294			rf_InitNode(tmpwmirNode, rf_wait, RF_FALSE,
1295				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1296				    rf_GenericWakeupFunc, 1, 1, 4, 0,
1297				    dag_h, "Wsd", allocList);
1298			RF_ASSERT(pda != NULL);
1299			tmpwmirNode->params[0].p = pdaP;
1300			tmpwmirNode->params[1].p = pda->bufPtr;
1301			tmpwmirNode->params[2].v = parityStripeID;
1302			tmpwmirNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1303			pda = pda->next;
1304			pdaP = pdaP->next;
1305			tmpwmirNode = tmpwmirNode->list_next;
1306		}
1307		RF_ASSERT(pda == NULL);
1308		RF_ASSERT(pdaP == NULL);
1309	}
1310	/* link the header node to the commit node */
1311	RF_ASSERT(dag_h->numSuccedents == 1);
1312	RF_ASSERT(commitNode->numAntecedents == 0);
1313	dag_h->succedents[0] = commitNode;
1314
1315	/* link the commit node to the write nodes */
1316	RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes));
1317	tmpwndNode = wndNode;
1318	for (i = 0; i < nWndNodes; i++) {
1319		RF_ASSERT(tmpwndNode->numAntecedents == 1);
1320		commitNode->succedents[i] = tmpwndNode;
1321		tmpwndNode->antecedents[0] = commitNode;
1322		tmpwndNode->antType[0] = rf_control;
1323		tmpwndNode = tmpwndNode->list_next;
1324	}
1325	tmpwmirNode = wmirNode;
1326	for (i = 0; i < nWmirNodes; i++) {
1327		RF_ASSERT(tmpwmirNode->numAntecedents == 1);
1328		commitNode->succedents[i + nWndNodes] = tmpwmirNode;
1329		tmpwmirNode->antecedents[0] = commitNode;
1330		tmpwmirNode->antType[0] = rf_control;
1331		tmpwmirNode = tmpwmirNode->list_next;
1332	}
1333
1334	/* link the write nodes to the unblock node */
1335	RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes));
1336	tmpwndNode = wndNode;
1337	for (i = 0; i < nWndNodes; i++) {
1338		RF_ASSERT(tmpwndNode->numSuccedents == 1);
1339		tmpwndNode->succedents[0] = unblockNode;
1340		unblockNode->antecedents[i] = tmpwndNode;
1341		unblockNode->antType[i] = rf_control;
1342		tmpwndNode = tmpwndNode->list_next;
1343	}
1344	tmpwmirNode = wmirNode;
1345	for (i = 0; i < nWmirNodes; i++) {
1346		RF_ASSERT(tmpwmirNode->numSuccedents == 1);
1347		tmpwmirNode->succedents[0] = unblockNode;
1348		unblockNode->antecedents[i + nWndNodes] = tmpwmirNode;
1349		unblockNode->antType[i + nWndNodes] = rf_control;
1350		tmpwmirNode = tmpwmirNode->list_next;
1351	}
1352
1353	/* link the unblock node to the term node */
1354	RF_ASSERT(unblockNode->numSuccedents == 1);
1355	RF_ASSERT(termNode->numAntecedents == 1);
1356	RF_ASSERT(termNode->numSuccedents == 0);
1357	unblockNode->succedents[0] = termNode;
1358	termNode->antecedents[0] = unblockNode;
1359	termNode->antType[0] = rf_control;
1360}
1361