1/*	$NetBSD: rf_dagffwr.c,v 1.38 2023/10/15 18:15:20 oster Exp $	*/
2/*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29/*
30 * rf_dagff.c
31 *
32 * code for creating fault-free DAGs
33 *
34 */
35
36#include <sys/cdefs.h>
37__KERNEL_RCSID(0, "$NetBSD: rf_dagffwr.c,v 1.38 2023/10/15 18:15:20 oster Exp $");
38
39#include <dev/raidframe/raidframevar.h>
40
41#include "rf_raid.h"
42#include "rf_dag.h"
43#include "rf_dagutils.h"
44#include "rf_dagfuncs.h"
45#include "rf_debugMem.h"
46#include "rf_dagffrd.h"
47#include "rf_general.h"
48#include "rf_dagffwr.h"
49#include "rf_map.h"
50
51/******************************************************************************
52 *
53 * General comments on DAG creation:
54 *
55 * All DAGs in this file use roll-away error recovery.  Each DAG has a single
56 * commit node, usually called "Cmt."  If an error occurs before the Cmt node
57 * is reached, the execution engine will halt forward execution and work
58 * backward through the graph, executing the undo functions.  Assuming that
59 * each node in the graph prior to the Cmt node are undoable and atomic - or -
60 * does not make changes to permanent state, the graph will fail atomically.
61 * If an error occurs after the Cmt node executes, the engine will roll-forward
62 * through the graph, blindly executing nodes until it reaches the end.
63 * If a graph reaches the end, it is assumed to have completed successfully.
64 *
65 * A graph has only 1 Cmt node.
66 *
67 */
68
69
70/******************************************************************************
71 *
72 * The following wrappers map the standard DAG creation interface to the
73 * DAG creation routines.  Additionally, these wrappers enable experimentation
74 * with new DAG structures by providing an extra level of indirection, allowing
75 * the DAG creation routines to be replaced at this single point.
76 */
77
78
79void
80rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
81			      RF_DagHeader_t *dag_h, void *bp,
82			      RF_RaidAccessFlags_t flags,
83			      RF_AllocListElem_t *allocList,
84			      RF_IoType_t type)
85{
86	rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
87				 RF_IO_TYPE_WRITE);
88}
89
90void
91rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
92		       RF_DagHeader_t *dag_h, void *bp,
93		       RF_RaidAccessFlags_t flags,
94		       RF_AllocListElem_t *allocList,
95		       RF_IoType_t type)
96{
97	rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
98				 RF_IO_TYPE_WRITE);
99}
100
101void
102rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
103		       RF_DagHeader_t *dag_h, void *bp,
104		       RF_RaidAccessFlags_t flags,
105		       RF_AllocListElem_t *allocList)
106{
107	/* "normal" rollaway */
108	rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
109				     allocList, &rf_xorFuncs, NULL);
110}
111
112void
113rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
114		       RF_DagHeader_t *dag_h, void *bp,
115		       RF_RaidAccessFlags_t flags,
116		       RF_AllocListElem_t *allocList)
117{
118	/* "normal" rollaway */
119	rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
120				     allocList, 1, rf_RegularXorFunc, RF_TRUE);
121}
122
123
124/******************************************************************************
125 *
126 * DAG creation code begins here
127 */
128#define BUF_ALLOC(num) \
129  RF_MallocAndAdd(rf_RaidAddressToByte(raidPtr, num), allocList)
130
131
132/******************************************************************************
133 *
134 * creates a DAG to perform a large-write operation:
135 *
136 *           / Rod \           / Wnd \
137 * H -- block- Rod - Xor - Cmt - Wnd --- T
138 *           \ Rod /          \  Wnp /
139 *                             \[Wnq]/
140 *
141 * The XOR node also does the Q calculation in the P+Q architecture.
142 * All nodes are before the commit node (Cmt) are assumed to be atomic and
143 * undoable - or - they make no changes to permanent state.
144 *
145 * Rod = read old data
146 * Cmt = commit node
147 * Wnp = write new parity
148 * Wnd = write new data
149 * Wnq = write new "q"
150 * [] denotes optional segments in the graph
151 *
152 * Parameters:  raidPtr   - description of the physical array
153 *              asmap     - logical & physical addresses for this access
154 *              bp        - buffer ptr (holds write data)
155 *              flags     - general flags (e.g. disk locking)
156 *              allocList - list of memory allocated in DAG creation
157 *              nfaults   - number of faults array can tolerate
158 *                          (equal to # redundancy units in stripe)
159 *              redfuncs  - list of redundancy generating functions
160 *
161 *****************************************************************************/
162
163void
164rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
165			     RF_DagHeader_t *dag_h, void *bp,
166			     RF_RaidAccessFlags_t flags,
167			     RF_AllocListElem_t *allocList,
168			     int nfaults, void (*redFunc) (RF_DagNode_t *),
169			     int allowBufferRecycle)
170{
171	RF_DagNode_t *wndNodes, *rodNodes, *xorNode, *wnpNode, *tmpNode;
172	RF_DagNode_t *blockNode, *commitNode, *termNode;
173#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
174	RF_DagNode_t *wnqNode;
175#endif
176	int     nWndNodes, nRodNodes, i, nodeNum, asmNum;
177	RF_AccessStripeMapHeader_t *new_asm_h[2];
178	RF_StripeNum_t parityStripeID;
179	char   *sosBuffer, *eosBuffer;
180	RF_ReconUnitNum_t which_ru;
181	RF_RaidLayout_t *layoutPtr;
182	RF_PhysDiskAddr_t *pda;
183
184	layoutPtr = &(raidPtr->Layout);
185	parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
186							asmap->raidAddress,
187							&which_ru);
188
189#if RF_DEBUG_DAG
190	if (rf_dagDebug) {
191		printf("[Creating large-write DAG]\n");
192	}
193#endif
194	dag_h->creator = "LargeWriteDAG";
195
196	dag_h->numCommitNodes = 1;
197	dag_h->numCommits = 0;
198	dag_h->numSuccedents = 1;
199
200	/* alloc the nodes: Wnd, xor, commit, block, term, and  Wnp */
201	nWndNodes = asmap->numStripeUnitsAccessed;
202
203	for (i = 0; i < nWndNodes; i++) {
204		tmpNode = rf_AllocDAGNode(raidPtr);
205		tmpNode->list_next = dag_h->nodes;
206		dag_h->nodes = tmpNode;
207	}
208	wndNodes = dag_h->nodes;
209
210	xorNode = rf_AllocDAGNode(raidPtr);
211	xorNode->list_next = dag_h->nodes;
212	dag_h->nodes = xorNode;
213
214	wnpNode = rf_AllocDAGNode(raidPtr);
215	wnpNode->list_next = dag_h->nodes;
216	dag_h->nodes = wnpNode;
217
218	blockNode = rf_AllocDAGNode(raidPtr);
219	blockNode->list_next = dag_h->nodes;
220	dag_h->nodes = blockNode;
221
222	commitNode = rf_AllocDAGNode(raidPtr);
223	commitNode->list_next = dag_h->nodes;
224	dag_h->nodes = commitNode;
225
226	termNode = rf_AllocDAGNode(raidPtr);
227	termNode->list_next = dag_h->nodes;
228	dag_h->nodes = termNode;
229
230#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
231	if (nfaults == 2) {
232		wnqNode = rf_AllocDAGNode(raidPtr);
233	} else {
234		wnqNode = NULL;
235	}
236#endif
237	rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h,
238					new_asm_h, &nRodNodes, &sosBuffer,
239					&eosBuffer, allocList);
240	if (nRodNodes > 0) {
241		for (i = 0; i < nRodNodes; i++) {
242			tmpNode = rf_AllocDAGNode(raidPtr);
243			tmpNode->list_next = dag_h->nodes;
244			dag_h->nodes = tmpNode;
245		}
246		rodNodes = dag_h->nodes;
247	} else {
248		rodNodes = NULL;
249	}
250
251	/* begin node initialization */
252	if (nRodNodes > 0) {
253		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
254			    rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0,
255			    dag_h, "Nil", allocList);
256	} else {
257		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
258			    rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0,
259			    dag_h, "Nil", allocList);
260	}
261
262	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
263		    rf_NullNodeUndoFunc, NULL, nWndNodes + nfaults, 1, 0, 0,
264		    dag_h, "Cmt", allocList);
265	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
266		    rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0,
267		    dag_h, "Trm", allocList);
268
269	/* initialize the Rod nodes */
270	tmpNode = rodNodes;
271	for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
272		if (new_asm_h[asmNum]) {
273			pda = new_asm_h[asmNum]->stripeMap->physInfo;
274			while (pda) {
275				rf_InitNode(tmpNode, rf_wait,
276					    RF_FALSE, rf_DiskReadFunc,
277					    rf_DiskReadUndoFunc,
278					    rf_GenericWakeupFunc,
279					    1, 1, 4, 0, dag_h,
280					    "Rod", allocList);
281				tmpNode->params[0].p = pda;
282				tmpNode->params[1].p = pda->bufPtr;
283				tmpNode->params[2].v = parityStripeID;
284				tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
285				    which_ru);
286				nodeNum++;
287				pda = pda->next;
288				tmpNode = tmpNode->list_next;
289			}
290		}
291	}
292	RF_ASSERT(nodeNum == nRodNodes);
293
294	/* initialize the wnd nodes */
295	pda = asmap->physInfo;
296	tmpNode = wndNodes;
297	for (i = 0; i < nWndNodes; i++) {
298		rf_InitNode(tmpNode, rf_wait, RF_FALSE,
299			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
300			    rf_GenericWakeupFunc, 1, 1, 4, 0,
301			    dag_h, "Wnd", allocList);
302		RF_ASSERT(pda != NULL);
303		tmpNode->params[0].p = pda;
304		tmpNode->params[1].p = pda->bufPtr;
305		tmpNode->params[2].v = parityStripeID;
306		tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
307		pda = pda->next;
308		tmpNode = tmpNode->list_next;
309	}
310
311	/* initialize the redundancy node */
312	if (nRodNodes > 0) {
313		rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
314			    rf_NullNodeUndoFunc, NULL, 1,
315			    nRodNodes, 2 * (nWndNodes + nRodNodes) + 1,
316			    nfaults, dag_h, "Xr ", allocList);
317	} else {
318		rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
319			    rf_NullNodeUndoFunc, NULL, 1,
320			    1, 2 * (nWndNodes + nRodNodes) + 1,
321			    nfaults, dag_h, "Xr ", allocList);
322	}
323	xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
324	tmpNode = wndNodes;
325	for (i = 0; i < nWndNodes; i++) {
326		/* pda */
327		xorNode->params[2 * i + 0] = tmpNode->params[0];
328		/* buf ptr */
329		xorNode->params[2 * i + 1] = tmpNode->params[1];
330		tmpNode = tmpNode->list_next;
331	}
332	tmpNode = rodNodes;
333	for (i = 0; i < nRodNodes; i++) {
334		/* pda */
335		xorNode->params[2 * (nWndNodes + i) + 0] = tmpNode->params[0];
336		/* buf ptr */
337		xorNode->params[2 * (nWndNodes + i) + 1] = tmpNode->params[1];
338		tmpNode = tmpNode->list_next;
339	}
340	/* xor node needs to get at RAID information */
341	xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr;
342
343	/*
344         * Look for an Rod node that reads a complete SU. If none,
345         * alloc a buffer to receive the parity info. Note that we
346         * can't use a new data buffer because it will not have gotten
347         * written when the xor occurs.  */
348	if (allowBufferRecycle) {
349		tmpNode = rodNodes;
350		for (i = 0; i < nRodNodes; i++) {
351			if (((RF_PhysDiskAddr_t *) tmpNode->params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
352				break;
353			tmpNode = tmpNode->list_next;
354		}
355	}
356	if ((!allowBufferRecycle) || (i == nRodNodes)) {
357		xorNode->results[0] = rf_AllocBuffer(raidPtr, dag_h, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit));
358	} else {
359		/* this works because the only way we get here is if
360		   allowBufferRecycle is true and we went through the
361		   above for loop, and exited via the break before
362		   i==nRodNodes was true.  That means tmpNode will
363		   still point to a valid node -- the one we want for
364		   here! */
365		xorNode->results[0] = tmpNode->params[1].p;
366	}
367
368	/* initialize the Wnp node */
369	rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
370		    rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
371		    dag_h, "Wnp", allocList);
372	wnpNode->params[0].p = asmap->parityInfo;
373	wnpNode->params[1].p = xorNode->results[0];
374	wnpNode->params[2].v = parityStripeID;
375	wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
376	/* parityInfo must describe entire parity unit */
377	RF_ASSERT(asmap->parityInfo->next == NULL);
378
379#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
380	if (nfaults == 2) {
381		/*
382	         * We never try to recycle a buffer for the Q calcuation
383	         * in addition to the parity. This would cause two buffers
384	         * to get smashed during the P and Q calculation, guaranteeing
385	         * one would be wrong.
386	         */
387		xorNode->results[1] =
388		    BUF_ALLOC(raidPtr->Layout.sectorsPerStripeUnit);
389		rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
390			    rf_DiskWriteUndoFunc, rf_GenericWakeupFunc,
391			    1, 1, 4, 0, dag_h, "Wnq", allocList);
392		wnqNode->params[0].p = asmap->qInfo;
393		wnqNode->params[1].p = xorNode->results[1];
394		wnqNode->params[2].v = parityStripeID;
395		wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
396		/* parityInfo must describe entire parity unit */
397		RF_ASSERT(asmap->parityInfo->next == NULL);
398	}
399#endif
400	/*
401         * Connect nodes to form graph.
402         */
403
404	/* connect dag header to block node */
405	RF_ASSERT(blockNode->numAntecedents == 0);
406	dag_h->succedents[0] = blockNode;
407
408	if (nRodNodes > 0) {
409		/* connect the block node to the Rod nodes */
410		RF_ASSERT(blockNode->numSuccedents == nRodNodes);
411		RF_ASSERT(xorNode->numAntecedents == nRodNodes);
412		tmpNode = rodNodes;
413		for (i = 0; i < nRodNodes; i++) {
414			RF_ASSERT(tmpNode->numAntecedents == 1);
415			blockNode->succedents[i] = tmpNode;
416			tmpNode->antecedents[0] = blockNode;
417			tmpNode->antType[0] = rf_control;
418
419			/* connect the Rod nodes to the Xor node */
420			RF_ASSERT(tmpNode->numSuccedents == 1);
421			tmpNode->succedents[0] = xorNode;
422			xorNode->antecedents[i] = tmpNode;
423			xorNode->antType[i] = rf_trueData;
424			tmpNode = tmpNode->list_next;
425		}
426	} else {
427		/* connect the block node to the Xor node */
428		RF_ASSERT(blockNode->numSuccedents == 1);
429		RF_ASSERT(xorNode->numAntecedents == 1);
430		blockNode->succedents[0] = xorNode;
431		xorNode->antecedents[0] = blockNode;
432		xorNode->antType[0] = rf_control;
433	}
434
435	/* connect the xor node to the commit node */
436	RF_ASSERT(xorNode->numSuccedents == 1);
437	RF_ASSERT(commitNode->numAntecedents == 1);
438	xorNode->succedents[0] = commitNode;
439	commitNode->antecedents[0] = xorNode;
440	commitNode->antType[0] = rf_control;
441
442	/* connect the commit node to the write nodes */
443	RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults);
444	tmpNode = wndNodes;
445	for (i = 0; i < nWndNodes; i++) {
446		RF_ASSERT(wndNodes->numAntecedents == 1);
447		commitNode->succedents[i] = tmpNode;
448		tmpNode->antecedents[0] = commitNode;
449		tmpNode->antType[0] = rf_control;
450		tmpNode = tmpNode->list_next;
451	}
452	RF_ASSERT(wnpNode->numAntecedents == 1);
453	commitNode->succedents[nWndNodes] = wnpNode;
454	wnpNode->antecedents[0] = commitNode;
455	wnpNode->antType[0] = rf_trueData;
456#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
457	if (nfaults == 2) {
458		RF_ASSERT(wnqNode->numAntecedents == 1);
459		commitNode->succedents[nWndNodes + 1] = wnqNode;
460		wnqNode->antecedents[0] = commitNode;
461		wnqNode->antType[0] = rf_trueData;
462	}
463#endif
464	/* connect the write nodes to the term node */
465	RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults);
466	RF_ASSERT(termNode->numSuccedents == 0);
467	tmpNode = wndNodes;
468	for (i = 0; i < nWndNodes; i++) {
469		RF_ASSERT(wndNodes->numSuccedents == 1);
470		tmpNode->succedents[0] = termNode;
471		termNode->antecedents[i] = tmpNode;
472		termNode->antType[i] = rf_control;
473		tmpNode = tmpNode->list_next;
474	}
475	RF_ASSERT(wnpNode->numSuccedents == 1);
476	wnpNode->succedents[0] = termNode;
477	termNode->antecedents[nWndNodes] = wnpNode;
478	termNode->antType[nWndNodes] = rf_control;
479#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
480	if (nfaults == 2) {
481		RF_ASSERT(wnqNode->numSuccedents == 1);
482		wnqNode->succedents[0] = termNode;
483		termNode->antecedents[nWndNodes + 1] = wnqNode;
484		termNode->antType[nWndNodes + 1] = rf_control;
485	}
486#endif
487}
488/******************************************************************************
489 *
490 * creates a DAG to perform a small-write operation (either raid 5 or pq),
491 * which is as follows:
492 *
493 * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm
494 *            \- Rod X      /     \----> Wnd [Und]-/
495 *           [\- Rod X     /       \---> Wnd [Und]-/]
496 *           [\- Roq -> Q /         \--> Wnq [Unq]-/]
497 *
498 * Rop = read old parity
499 * Rod = read old data
500 * Roq = read old "q"
501 * Cmt = commit node
502 * Und = unlock data disk
503 * Unp = unlock parity disk
504 * Unq = unlock q disk
505 * Wnp = write new parity
506 * Wnd = write new data
507 * Wnq = write new "q"
508 * [ ] denotes optional segments in the graph
509 *
510 * Parameters:  raidPtr   - description of the physical array
511 *              asmap     - logical & physical addresses for this access
512 *              bp        - buffer ptr (holds write data)
513 *              flags     - general flags (e.g. disk locking)
514 *              allocList - list of memory allocated in DAG creation
515 *              pfuncs    - list of parity generating functions
516 *              qfuncs    - list of q generating functions
517 *
518 * A null qfuncs indicates single fault tolerant
519 *****************************************************************************/
520
521void
522rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
523			     RF_DagHeader_t *dag_h, void *bp,
524			     RF_RaidAccessFlags_t flags,
525			     RF_AllocListElem_t *allocList,
526			     const RF_RedFuncs_t *pfuncs,
527			     const RF_RedFuncs_t *qfuncs)
528{
529	RF_DagNode_t *readDataNodes, *readParityNodes, *termNode;
530	RF_DagNode_t *tmpNode, *tmpreadDataNode, *tmpreadParityNode;
531	RF_DagNode_t *xorNodes, *blockNode, *commitNode;
532	RF_DagNode_t *writeDataNodes, *writeParityNodes;
533	RF_DagNode_t *tmpxorNode, *tmpwriteDataNode;
534	RF_DagNode_t *tmpwriteParityNode;
535#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
536	RF_DagNode_t *tmpwriteQNode, *tmpreadQNode, *tmpqNode, *readQNodes,
537	     *writeQNodes, *qNodes;
538#endif
539	int     i, j, nNodes;
540	RF_ReconUnitNum_t which_ru;
541	void    (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
542	void    (*qfunc) (RF_DagNode_t *) __unused;
543	int     numDataNodes, numParityNodes;
544	RF_StripeNum_t parityStripeID;
545	RF_PhysDiskAddr_t *pda;
546	const char *name, *qname __unused;
547	long    nfaults;
548
549	nfaults = qfuncs ? 2 : 1;
550
551	parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
552	    asmap->raidAddress, &which_ru);
553	pda = asmap->physInfo;
554	numDataNodes = asmap->numStripeUnitsAccessed;
555	numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
556
557#if RF_DEBUG_DAG
558	if (rf_dagDebug) {
559		printf("[Creating small-write DAG]\n");
560	}
561#endif
562	RF_ASSERT(numDataNodes > 0);
563	dag_h->creator = "SmallWriteDAG";
564
565	dag_h->numCommitNodes = 1;
566	dag_h->numCommits = 0;
567	dag_h->numSuccedents = 1;
568
569	/*
570         * DAG creation occurs in four steps:
571         * 1. count the number of nodes in the DAG
572         * 2. create the nodes
573         * 3. initialize the nodes
574         * 4. connect the nodes
575         */
576
577	/*
578         * Step 1. compute number of nodes in the graph
579         */
580
581	/* number of nodes: a read and write for each data unit a
582	 * redundancy computation node for each parity node (nfaults *
583	 * nparity) a read and write for each parity unit a block and
584	 * commit node (2) a terminate node if atomic RMW an unlock
585	 * node for each data unit, redundancy unit
586	 * totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes)
587	 *   + (nfaults * 2 * numParityNodes) + 3;
588	 */
589
590	/*
591         * Step 2. create the nodes
592         */
593
594	blockNode = rf_AllocDAGNode(raidPtr);
595	blockNode->list_next = dag_h->nodes;
596	dag_h->nodes = blockNode;
597
598	commitNode = rf_AllocDAGNode(raidPtr);
599	commitNode->list_next = dag_h->nodes;
600	dag_h->nodes = commitNode;
601
602	for (i = 0; i < numDataNodes; i++) {
603		tmpNode = rf_AllocDAGNode(raidPtr);
604		tmpNode->list_next = dag_h->nodes;
605		dag_h->nodes = tmpNode;
606	}
607	readDataNodes = dag_h->nodes;
608
609	for (i = 0; i < numParityNodes; i++) {
610		tmpNode = rf_AllocDAGNode(raidPtr);
611		tmpNode->list_next = dag_h->nodes;
612		dag_h->nodes = tmpNode;
613	}
614	readParityNodes = dag_h->nodes;
615
616	for (i = 0; i < numDataNodes; i++) {
617		tmpNode = rf_AllocDAGNode(raidPtr);
618		tmpNode->list_next = dag_h->nodes;
619		dag_h->nodes = tmpNode;
620	}
621	writeDataNodes = dag_h->nodes;
622
623	for (i = 0; i < numParityNodes; i++) {
624		tmpNode = rf_AllocDAGNode(raidPtr);
625		tmpNode->list_next = dag_h->nodes;
626		dag_h->nodes = tmpNode;
627	}
628	writeParityNodes = dag_h->nodes;
629
630	for (i = 0; i < numParityNodes; i++) {
631		tmpNode = rf_AllocDAGNode(raidPtr);
632		tmpNode->list_next = dag_h->nodes;
633		dag_h->nodes = tmpNode;
634	}
635	xorNodes = dag_h->nodes;
636
637	termNode = rf_AllocDAGNode(raidPtr);
638	termNode->list_next = dag_h->nodes;
639	dag_h->nodes = termNode;
640
641#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
642	if (nfaults == 2) {
643		for (i = 0; i < numParityNodes; i++) {
644			tmpNode = rf_AllocDAGNode(raidPtr);
645			tmpNode->list_next = dag_h->nodes;
646			dag_h->nodes = tmpNode;
647		}
648		readQNodes = dag_h->nodes;
649
650		for (i = 0; i < numParityNodes; i++) {
651			tmpNode = rf_AllocDAGNode(raidPtr);
652			tmpNode->list_next = dag_h->nodes;
653			dag_h->nodes = tmpNode;
654		}
655		writeQNodes = dag_h->nodes;
656
657		for (i = 0; i < numParityNodes; i++) {
658			tmpNode = rf_AllocDAGNode(raidPtr);
659			tmpNode->list_next = dag_h->nodes;
660			dag_h->nodes = tmpNode;
661		}
662		qNodes = dag_h->nodes;
663	} else {
664		readQNodes = writeQNodes = qNodes = NULL;
665	}
666#endif
667
668	/*
669         * Step 3. initialize the nodes
670         */
671	/* initialize block node (Nil) */
672	nNodes = numDataNodes + (nfaults * numParityNodes);
673	rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
674		    rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0,
675		    dag_h, "Nil", allocList);
676
677	/* initialize commit node (Cmt) */
678	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
679		    rf_NullNodeUndoFunc, NULL, nNodes,
680		    (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList);
681
682	/* initialize terminate node (Trm) */
683	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
684		    rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0,
685		    dag_h, "Trm", allocList);
686
687	/* initialize nodes which read old data (Rod) */
688	tmpreadDataNode = readDataNodes;
689	for (i = 0; i < numDataNodes; i++) {
690		rf_InitNode(tmpreadDataNode, rf_wait, RF_FALSE,
691			    rf_DiskReadFunc, rf_DiskReadUndoFunc,
692			    rf_GenericWakeupFunc, (nfaults * numParityNodes),
693			    1, 4, 0, dag_h, "Rod", allocList);
694		RF_ASSERT(pda != NULL);
695		/* physical disk addr desc */
696		tmpreadDataNode->params[0].p = pda;
697		/* buffer to hold old data */
698		tmpreadDataNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
699		tmpreadDataNode->params[2].v = parityStripeID;
700		tmpreadDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
701		    which_ru);
702		pda = pda->next;
703		for (j = 0; j < tmpreadDataNode->numSuccedents; j++) {
704			tmpreadDataNode->propList[j] = NULL;
705		}
706		tmpreadDataNode = tmpreadDataNode->list_next;
707	}
708
709	/* initialize nodes which read old parity (Rop) */
710	pda = asmap->parityInfo;
711	i = 0;
712	tmpreadParityNode = readParityNodes;
713	for (i = 0; i < numParityNodes; i++) {
714		RF_ASSERT(pda != NULL);
715		rf_InitNode(tmpreadParityNode, rf_wait, RF_FALSE,
716			    rf_DiskReadFunc, rf_DiskReadUndoFunc,
717			    rf_GenericWakeupFunc, numParityNodes, 1, 4, 0,
718			    dag_h, "Rop", allocList);
719		tmpreadParityNode->params[0].p = pda;
720		/* buffer to hold old parity */
721		tmpreadParityNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
722		tmpreadParityNode->params[2].v = parityStripeID;
723		tmpreadParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
724		    which_ru);
725		pda = pda->next;
726		for (j = 0; j < tmpreadParityNode->numSuccedents; j++) {
727			tmpreadParityNode->propList[0] = NULL;
728		}
729		tmpreadParityNode = tmpreadParityNode->list_next;
730	}
731
732#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
733	/* initialize nodes which read old Q (Roq) */
734	if (nfaults == 2) {
735		pda = asmap->qInfo;
736		tmpreadQNode = readQNodes;
737		for (i = 0; i < numParityNodes; i++) {
738			RF_ASSERT(pda != NULL);
739			rf_InitNode(tmpreadQNode, rf_wait, RF_FALSE,
740				    rf_DiskReadFunc, rf_DiskReadUndoFunc,
741				    rf_GenericWakeupFunc, numParityNodes,
742				    1, 4, 0, dag_h, "Roq", allocList);
743			tmpreadQNode->params[0].p = pda;
744			/* buffer to hold old Q */
745			tmpreadQNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h,
746								   pda->numSector << raidPtr->logBytesPerSector);
747			tmpreadQNode->params[2].v = parityStripeID;
748			tmpreadQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
749			    which_ru);
750			pda = pda->next;
751			for (j = 0; j < tmpreadQNode->numSuccedents; j++) {
752				tmpreadQNode->propList[0] = NULL;
753			}
754			tmpreadQNode = tmpreadQNode->list_next;
755		}
756	}
757#endif
758	/* initialize nodes which write new data (Wnd) */
759	pda = asmap->physInfo;
760	tmpwriteDataNode = writeDataNodes;
761	for (i = 0; i < numDataNodes; i++) {
762		RF_ASSERT(pda != NULL);
763		rf_InitNode(tmpwriteDataNode, rf_wait, RF_FALSE,
764			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
765			    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
766			    "Wnd", allocList);
767		/* physical disk addr desc */
768		tmpwriteDataNode->params[0].p = pda;
769		/* buffer holding new data to be written */
770		tmpwriteDataNode->params[1].p = pda->bufPtr;
771		tmpwriteDataNode->params[2].v = parityStripeID;
772		tmpwriteDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
773		    which_ru);
774		pda = pda->next;
775		tmpwriteDataNode = tmpwriteDataNode->list_next;
776	}
777
778	/*
779         * Initialize nodes which compute new parity and Q.
780         */
781	/*
782         * We use the simple XOR func in the double-XOR case, and when
783         * we're accessing only a portion of one stripe unit. The
784         * distinction between the two is that the regular XOR func
785         * assumes that the targbuf is a full SU in size, and examines
786         * the pda associated with the buffer to decide where within
787         * the buffer to XOR the data, whereas the simple XOR func
788         * just XORs the data into the start of the buffer.  */
789	if ((numParityNodes == 2) || ((numDataNodes == 1)
790		&& (asmap->totalSectorsAccessed <
791		    raidPtr->Layout.sectorsPerStripeUnit))) {
792		func = pfuncs->simple;
793		undoFunc = rf_NullNodeUndoFunc;
794		name = pfuncs->SimpleName;
795		if (qfuncs) {
796			qfunc = qfuncs->simple;
797			qname = qfuncs->SimpleName;
798		} else {
799			qfunc = NULL;
800			qname = NULL;
801		}
802	} else {
803		func = pfuncs->regular;
804		undoFunc = rf_NullNodeUndoFunc;
805		name = pfuncs->RegularName;
806		if (qfuncs) {
807			qfunc = qfuncs->regular;
808			qname = qfuncs->RegularName;
809		} else {
810			qfunc = NULL;
811			qname = NULL;
812		}
813	}
814	/*
815         * Initialize the xor nodes: params are {pda,buf}
816         * from {Rod,Wnd,Rop} nodes, and raidPtr
817         */
818	if (numParityNodes == 2) {
819		/* double-xor case */
820		tmpxorNode = xorNodes;
821		tmpreadDataNode = readDataNodes;
822		tmpreadParityNode = readParityNodes;
823		tmpwriteDataNode = writeDataNodes;
824#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
825		tmpqNode = qNodes;
826		tmpreadQNode = readQNodes;
827#endif
828		for (i = 0; i < numParityNodes; i++) {
829			/* note: no wakeup func for xor */
830			rf_InitNode(tmpxorNode, rf_wait, RF_FALSE, func,
831				    undoFunc, NULL, 1,
832				    (numDataNodes + numParityNodes),
833				    7, 1, dag_h, name, allocList);
834			tmpxorNode->flags |= RF_DAGNODE_FLAG_YIELD;
835			tmpxorNode->params[0] = tmpreadDataNode->params[0];
836			tmpxorNode->params[1] = tmpreadDataNode->params[1];
837			tmpxorNode->params[2] = tmpreadParityNode->params[0];
838			tmpxorNode->params[3] = tmpreadParityNode->params[1];
839			tmpxorNode->params[4] = tmpwriteDataNode->params[0];
840			tmpxorNode->params[5] = tmpwriteDataNode->params[1];
841			tmpxorNode->params[6].p = raidPtr;
842			/* use old parity buf as target buf */
843			tmpxorNode->results[0] = tmpreadParityNode->params[1].p;
844#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
845			if (nfaults == 2) {
846				/* note: no wakeup func for qor */
847				rf_InitNode(tmpqNode, rf_wait, RF_FALSE,
848					    qfunc, undoFunc, NULL, 1,
849					    (numDataNodes + numParityNodes),
850					    7, 1, dag_h, qname, allocList);
851				tmpqNode->params[0] = tmpreadDataNode->params[0];
852				tmpqNode->params[1] = tmpreadDataNode->params[1];
853				tmpqNode->params[2] = tmpreadQNode->params[0];
854				tmpqNode->params[3] = tmpreadQNode->params[1];
855				tmpqNode->params[4] = tmpwriteDataNode->params[0];
856				tmpqNode->params[5] = tmpwriteDataNode->params[1];
857				tmpqNode->params[6].p = raidPtr;
858				/* use old Q buf as target buf */
859				tmpqNode->results[0] = tmpreadQNode->params[1].p;
860				tmpqNode = tmpqNode->list_next;
861				tmpreadQNode = tmpreadQNode->list_next;
862			}
863#endif
864			tmpxorNode = tmpxorNode->list_next;
865			tmpreadDataNode = tmpreadDataNode->list_next;
866			tmpreadParityNode = tmpreadParityNode->list_next;
867			tmpwriteDataNode = tmpwriteDataNode->list_next;
868		}
869	} else {
870		/* there is only one xor node in this case */
871		rf_InitNode(xorNodes, rf_wait, RF_FALSE, func,
872			    undoFunc, NULL, 1, (numDataNodes + numParityNodes),
873			    (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
874			    dag_h, name, allocList);
875		xorNodes->flags |= RF_DAGNODE_FLAG_YIELD;
876		tmpreadDataNode = readDataNodes;
877		for (i = 0; i < numDataNodes; i++) { /* used to be"numDataNodes + 1" until we factored
878							out the "+1" into the "deal with Rop separately below */
879			/* set up params related to Rod nodes */
880			xorNodes->params[2 * i + 0] = tmpreadDataNode->params[0];	/* pda */
881			xorNodes->params[2 * i + 1] = tmpreadDataNode->params[1];	/* buffer ptr */
882			tmpreadDataNode = tmpreadDataNode->list_next;
883		}
884		/* deal with Rop separately */
885		xorNodes->params[2 * numDataNodes + 0] = readParityNodes->params[0];    /* pda */
886		xorNodes->params[2 * numDataNodes + 1] = readParityNodes->params[1];    /* buffer ptr */
887
888		tmpwriteDataNode = writeDataNodes;
889		for (i = 0; i < numDataNodes; i++) {
890			/* set up params related to Wnd and Wnp nodes */
891			xorNodes->params[2 * (numDataNodes + 1 + i) + 0] =	/* pda */
892			    tmpwriteDataNode->params[0];
893			xorNodes->params[2 * (numDataNodes + 1 + i) + 1] =	/* buffer ptr */
894			    tmpwriteDataNode->params[1];
895			tmpwriteDataNode = tmpwriteDataNode->list_next;
896		}
897		/* xor node needs to get at RAID information */
898		xorNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
899		xorNodes->results[0] = readParityNodes->params[1].p;
900#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
901		if (nfaults == 2) {
902			rf_InitNode(qNodes, rf_wait, RF_FALSE, qfunc,
903				    undoFunc, NULL, 1,
904				    (numDataNodes + numParityNodes),
905				    (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
906				    dag_h, qname, allocList);
907			tmpreadDataNode = readDataNodes;
908			for (i = 0; i < numDataNodes; i++) {
909				/* set up params related to Rod */
910				qNodes->params[2 * i + 0] = tmpreadDataNode->params[0];	/* pda */
911				qNodes->params[2 * i + 1] = tmpreadDataNode->params[1];	/* buffer ptr */
912				tmpreadDataNode = tmpreadDataNode->list_next;
913			}
914			/* and read old q */
915			qNodes->params[2 * numDataNodes + 0] =	/* pda */
916			    readQNodes->params[0];
917			qNodes->params[2 * numDataNodes + 1] =	/* buffer ptr */
918			    readQNodes->params[1];
919			tmpwriteDataNode = writeDataNodes;
920			for (i = 0; i < numDataNodes; i++) {
921				/* set up params related to Wnd nodes */
922				qNodes->params[2 * (numDataNodes + 1 + i) + 0] =	/* pda */
923				    tmpwriteDataNode->params[0];
924				qNodes->params[2 * (numDataNodes + 1 + i) + 1] =	/* buffer ptr */
925				    tmpwriteDataNode->params[1];
926				tmpwriteDataNode = tmpwriteDataNode->list_next;
927			}
928			/* xor node needs to get at RAID information */
929			qNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
930			qNodes->results[0] = readQNodes->params[1].p;
931		}
932#endif
933	}
934
935	/* initialize nodes which write new parity (Wnp) */
936	pda = asmap->parityInfo;
937	tmpwriteParityNode = writeParityNodes;
938	tmpxorNode = xorNodes;
939	for (i = 0; i < numParityNodes; i++) {
940		rf_InitNode(tmpwriteParityNode, rf_wait, RF_FALSE,
941			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
942			    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
943			    "Wnp", allocList);
944		RF_ASSERT(pda != NULL);
945		tmpwriteParityNode->params[0].p = pda;	/* param 1 (bufPtr)
946				  			 * filled in by xor node */
947		tmpwriteParityNode->params[1].p = tmpxorNode->results[0];	/* buffer pointer for
948				  						 * parity write
949				  						 * operation */
950		tmpwriteParityNode->params[2].v = parityStripeID;
951		tmpwriteParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
952		    which_ru);
953		pda = pda->next;
954		tmpwriteParityNode = tmpwriteParityNode->list_next;
955		tmpxorNode = tmpxorNode->list_next;
956	}
957
958#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
959	/* initialize nodes which write new Q (Wnq) */
960	if (nfaults == 2) {
961		pda = asmap->qInfo;
962		tmpwriteQNode = writeQNodes;
963		tmpqNode = qNodes;
964		for (i = 0; i < numParityNodes; i++) {
965			rf_InitNode(tmpwriteQNode, rf_wait, RF_FALSE,
966				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
967				    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
968				    "Wnq", allocList);
969			RF_ASSERT(pda != NULL);
970			tmpwriteQNode->params[0].p = pda;	/* param 1 (bufPtr)
971								 * filled in by xor node */
972			tmpwriteQNode->params[1].p = tmpqNode->results[0];	/* buffer pointer for
973										 * parity write
974										 * operation */
975			tmpwriteQNode->params[2].v = parityStripeID;
976			tmpwriteQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
977			    which_ru);
978			pda = pda->next;
979			tmpwriteQNode = tmpwriteQNode->list_next;
980			tmpqNode = tmpqNode->list_next;
981		}
982	}
983#endif
984	/*
985         * Step 4. connect the nodes.
986         */
987
988	/* connect header to block node */
989	dag_h->succedents[0] = blockNode;
990
991	/* connect block node to read old data nodes */
992	RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults)));
993	tmpreadDataNode = readDataNodes;
994	for (i = 0; i < numDataNodes; i++) {
995		blockNode->succedents[i] = tmpreadDataNode;
996		RF_ASSERT(tmpreadDataNode->numAntecedents == 1);
997		tmpreadDataNode->antecedents[0] = blockNode;
998		tmpreadDataNode->antType[0] = rf_control;
999		tmpreadDataNode = tmpreadDataNode->list_next;
1000	}
1001
1002	/* connect block node to read old parity nodes */
1003	tmpreadParityNode = readParityNodes;
1004	for (i = 0; i < numParityNodes; i++) {
1005		blockNode->succedents[numDataNodes + i] = tmpreadParityNode;
1006		RF_ASSERT(tmpreadParityNode->numAntecedents == 1);
1007		tmpreadParityNode->antecedents[0] = blockNode;
1008		tmpreadParityNode->antType[0] = rf_control;
1009		tmpreadParityNode = tmpreadParityNode->list_next;
1010	}
1011
1012#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1013	/* connect block node to read old Q nodes */
1014	if (nfaults == 2) {
1015		tmpreadQNode = readQNodes;
1016		for (i = 0; i < numParityNodes; i++) {
1017			blockNode->succedents[numDataNodes + numParityNodes + i] = tmpreadQNode;
1018			RF_ASSERT(tmpreadQNode->numAntecedents == 1);
1019			tmpreadQNode->antecedents[0] = blockNode;
1020			tmpreadQNode->antType[0] = rf_control;
1021			tmpreadQNode = tmpreadQNode->list_next;
1022		}
1023	}
1024#endif
1025	/* connect read old data nodes to xor nodes */
1026	tmpreadDataNode = readDataNodes;
1027	for (i = 0; i < numDataNodes; i++) {
1028		RF_ASSERT(tmpreadDataNode->numSuccedents == (nfaults * numParityNodes));
1029		tmpxorNode = xorNodes;
1030		for (j = 0; j < numParityNodes; j++) {
1031			RF_ASSERT(tmpxorNode->numAntecedents == numDataNodes + numParityNodes);
1032			tmpreadDataNode->succedents[j] = tmpxorNode;
1033			tmpxorNode->antecedents[i] = tmpreadDataNode;
1034			tmpxorNode->antType[i] = rf_trueData;
1035			tmpxorNode = tmpxorNode->list_next;
1036		}
1037		tmpreadDataNode = tmpreadDataNode->list_next;
1038	}
1039
1040#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1041	/* connect read old data nodes to q nodes */
1042	if (nfaults == 2) {
1043		tmpreadDataNode = readDataNodes;
1044		for (i = 0; i < numDataNodes; i++) {
1045			tmpqNode = qNodes;
1046			for (j = 0; j < numParityNodes; j++) {
1047				RF_ASSERT(tmpqNode->numAntecedents == numDataNodes + numParityNodes);
1048				tmpreadDataNode->succedents[numParityNodes + j] = tmpqNode;
1049				tmpqNode->antecedents[i] = tmpreadDataNode;
1050				tmpqNode->antType[i] = rf_trueData;
1051				tmpqNode = tmpqNode->list_next;
1052			}
1053			tmpreadDataNode = tmpreadDataNode->list_next;
1054		}
1055	}
1056#endif
1057	/* connect read old parity nodes to xor nodes */
1058	tmpreadParityNode = readParityNodes;
1059	for (i = 0; i < numParityNodes; i++) {
1060		RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1061		tmpxorNode = xorNodes;
1062		for (j = 0; j < numParityNodes; j++) {
1063			tmpreadParityNode->succedents[j] = tmpxorNode;
1064			tmpxorNode->antecedents[numDataNodes + i] = tmpreadParityNode;
1065			tmpxorNode->antType[numDataNodes + i] = rf_trueData;
1066			tmpxorNode = tmpxorNode->list_next;
1067		}
1068		tmpreadParityNode = tmpreadParityNode->list_next;
1069	}
1070
1071#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1072	/* connect read old q nodes to q nodes */
1073	if (nfaults == 2) {
1074		tmpreadParityNode = readParityNodes;
1075		tmpreadQNode = readQNodes;
1076		for (i = 0; i < numParityNodes; i++) {
1077			RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1078			tmpqNode = qNodes;
1079			for (j = 0; j < numParityNodes; j++) {
1080				tmpreadQNode->succedents[j] = tmpqNode;
1081				tmpqNode->antecedents[numDataNodes + i] = tmpreadQNode;
1082				tmpqNode->antType[numDataNodes + i] = rf_trueData;
1083				tmpqNode = tmpqNode->list_next;
1084			}
1085			tmpreadParityNode = tmpreadParityNode->list_next;
1086			tmpreadQNode = tmpreadQNode->list_next;
1087		}
1088	}
1089#endif
1090	/* connect xor nodes to commit node */
1091	RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes));
1092	tmpxorNode = xorNodes;
1093	for (i = 0; i < numParityNodes; i++) {
1094		RF_ASSERT(tmpxorNode->numSuccedents == 1);
1095		tmpxorNode->succedents[0] = commitNode;
1096		commitNode->antecedents[i] = tmpxorNode;
1097		commitNode->antType[i] = rf_control;
1098		tmpxorNode = tmpxorNode->list_next;
1099	}
1100
1101#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1102	/* connect q nodes to commit node */
1103	if (nfaults == 2) {
1104		tmpqNode = qNodes;
1105		for (i = 0; i < numParityNodes; i++) {
1106			RF_ASSERT(tmpqNode->numSuccedents == 1);
1107			tmpqNode->succedents[0] = commitNode;
1108			commitNode->antecedents[i + numParityNodes] = tmpqNode;
1109			commitNode->antType[i + numParityNodes] = rf_control;
1110			tmpqNode = tmpqNode->list_next;
1111		}
1112	}
1113#endif
1114	/* connect commit node to write nodes */
1115	RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes)));
1116	tmpwriteDataNode = writeDataNodes;
1117	for (i = 0; i < numDataNodes; i++) {
1118		RF_ASSERT(tmpwriteDataNode->numAntecedents == 1);
1119		commitNode->succedents[i] = tmpwriteDataNode;
1120		tmpwriteDataNode->antecedents[0] = commitNode;
1121		tmpwriteDataNode->antType[0] = rf_trueData;
1122		tmpwriteDataNode = tmpwriteDataNode->list_next;
1123	}
1124	tmpwriteParityNode = writeParityNodes;
1125	for (i = 0; i < numParityNodes; i++) {
1126		RF_ASSERT(tmpwriteParityNode->numAntecedents == 1);
1127		commitNode->succedents[i + numDataNodes] = tmpwriteParityNode;
1128		tmpwriteParityNode->antecedents[0] = commitNode;
1129		tmpwriteParityNode->antType[0] = rf_trueData;
1130		tmpwriteParityNode = tmpwriteParityNode->list_next;
1131	}
1132#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1133	if (nfaults == 2) {
1134		tmpwriteQNode = writeQNodes;
1135		for (i = 0; i < numParityNodes; i++) {
1136			RF_ASSERT(tmpwriteQNode->numAntecedents == 1);
1137			commitNode->succedents[i + numDataNodes + numParityNodes] = tmpwriteQNode;
1138			tmpwriteQNode->antecedents[0] = commitNode;
1139			tmpwriteQNode->antType[0] = rf_trueData;
1140			tmpwriteQNode = tmpwriteQNode->list_next;
1141		}
1142	}
1143#endif
1144	RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1145	RF_ASSERT(termNode->numSuccedents == 0);
1146	tmpwriteDataNode = writeDataNodes;
1147	for (i = 0; i < numDataNodes; i++) {
1148		/* connect write new data nodes to term node */
1149		RF_ASSERT(tmpwriteDataNode->numSuccedents == 1);
1150		RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1151		tmpwriteDataNode->succedents[0] = termNode;
1152		termNode->antecedents[i] = tmpwriteDataNode;
1153		termNode->antType[i] = rf_control;
1154		tmpwriteDataNode = tmpwriteDataNode->list_next;
1155	}
1156
1157	tmpwriteParityNode = writeParityNodes;
1158	for (i = 0; i < numParityNodes; i++) {
1159		RF_ASSERT(tmpwriteParityNode->numSuccedents == 1);
1160		tmpwriteParityNode->succedents[0] = termNode;
1161		termNode->antecedents[numDataNodes + i] = tmpwriteParityNode;
1162		termNode->antType[numDataNodes + i] = rf_control;
1163		tmpwriteParityNode = tmpwriteParityNode->list_next;
1164	}
1165
1166#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1167	if (nfaults == 2) {
1168		tmpwriteQNode = writeQNodes;
1169		for (i = 0; i < numParityNodes; i++) {
1170			RF_ASSERT(tmpwriteQNode->numSuccedents == 1);
1171			tmpwriteQNode->succedents[0] = termNode;
1172			termNode->antecedents[numDataNodes + numParityNodes + i] = tmpwriteQNode;
1173			termNode->antType[numDataNodes + numParityNodes + i] = rf_control;
1174			tmpwriteQNode = tmpwriteQNode->list_next;
1175		}
1176	}
1177#endif
1178}
1179
1180
1181/******************************************************************************
1182 * create a write graph (fault-free or degraded) for RAID level 1
1183 *
1184 * Hdr -> Commit -> Wpd -> Nil -> Trm
1185 *               -> Wsd ->
1186 *
1187 * The "Wpd" node writes data to the primary copy in the mirror pair
1188 * The "Wsd" node writes data to the secondary copy in the mirror pair
1189 *
1190 * Parameters:  raidPtr   - description of the physical array
1191 *              asmap     - logical & physical addresses for this access
1192 *              bp        - buffer ptr (holds write data)
1193 *              flags     - general flags (e.g. disk locking)
1194 *              allocList - list of memory allocated in DAG creation
1195 *****************************************************************************/
1196
1197void
1198rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
1199			 RF_DagHeader_t *dag_h, void *bp,
1200			 RF_RaidAccessFlags_t flags,
1201			 RF_AllocListElem_t *allocList)
1202{
1203	RF_DagNode_t *unblockNode, *termNode, *commitNode;
1204	RF_DagNode_t *wndNode, *wmirNode;
1205	RF_DagNode_t *tmpNode, *tmpwndNode, *tmpwmirNode;
1206	int     nWndNodes, nWmirNodes, i;
1207	RF_ReconUnitNum_t which_ru;
1208	RF_PhysDiskAddr_t *pda, *pdaP;
1209	RF_StripeNum_t parityStripeID;
1210
1211	parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
1212	    asmap->raidAddress, &which_ru);
1213#if RF_DEBUG_DAG
1214	if (rf_dagDebug) {
1215		printf("[Creating RAID level 1 write DAG]\n");
1216	}
1217#endif
1218	dag_h->creator = "RaidOneWriteDAG";
1219
1220	/* 2 implies access not SU aligned */
1221	nWmirNodes = (asmap->parityInfo->next) ? 2 : 1;
1222	nWndNodes = (asmap->physInfo->next) ? 2 : 1;
1223
1224	/* alloc the Wnd nodes and the Wmir node */
1225	if (asmap->numDataFailed == 1)
1226		nWndNodes--;
1227	if (asmap->numParityFailed == 1)
1228		nWmirNodes--;
1229
1230	/* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock
1231	 * + terminator) */
1232	for (i = 0; i < nWndNodes; i++) {
1233		tmpNode = rf_AllocDAGNode(raidPtr);
1234		tmpNode->list_next = dag_h->nodes;
1235		dag_h->nodes = tmpNode;
1236	}
1237	wndNode = dag_h->nodes;
1238
1239	for (i = 0; i < nWmirNodes; i++) {
1240		tmpNode = rf_AllocDAGNode(raidPtr);
1241		tmpNode->list_next = dag_h->nodes;
1242		dag_h->nodes = tmpNode;
1243	}
1244	wmirNode = dag_h->nodes;
1245
1246	commitNode = rf_AllocDAGNode(raidPtr);
1247	commitNode->list_next = dag_h->nodes;
1248	dag_h->nodes = commitNode;
1249
1250	unblockNode = rf_AllocDAGNode(raidPtr);
1251	unblockNode->list_next = dag_h->nodes;
1252	dag_h->nodes = unblockNode;
1253
1254	termNode = rf_AllocDAGNode(raidPtr);
1255	termNode->list_next = dag_h->nodes;
1256	dag_h->nodes = termNode;
1257
1258	/* this dag can commit immediately */
1259	dag_h->numCommitNodes = 1;
1260	dag_h->numCommits = 0;
1261	dag_h->numSuccedents = 1;
1262
1263	/* initialize the commit, unblock, and term nodes */
1264	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
1265		    rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes),
1266		    0, 0, 0, dag_h, "Cmt", allocList);
1267	rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
1268		    rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes),
1269		    0, 0, dag_h, "Nil", allocList);
1270	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
1271		    rf_TerminateUndoFunc, NULL, 0, 1, 0, 0,
1272		    dag_h, "Trm", allocList);
1273
1274	/* initialize the wnd nodes */
1275	if (nWndNodes > 0) {
1276		pda = asmap->physInfo;
1277		tmpwndNode = wndNode;
1278		for (i = 0; i < nWndNodes; i++) {
1279			rf_InitNode(tmpwndNode, rf_wait, RF_FALSE,
1280				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1281				    rf_GenericWakeupFunc, 1, 1, 4, 0,
1282				    dag_h, "Wpd", allocList);
1283			RF_ASSERT(pda != NULL);
1284			tmpwndNode->params[0].p = pda;
1285			tmpwndNode->params[1].p = pda->bufPtr;
1286			tmpwndNode->params[2].v = parityStripeID;
1287			tmpwndNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1288			pda = pda->next;
1289			tmpwndNode = tmpwndNode->list_next;
1290		}
1291		RF_ASSERT(pda == NULL);
1292	}
1293	/* initialize the mirror nodes */
1294	if (nWmirNodes > 0) {
1295		pda = asmap->physInfo;
1296		pdaP = asmap->parityInfo;
1297		tmpwmirNode = wmirNode;
1298		for (i = 0; i < nWmirNodes; i++) {
1299			rf_InitNode(tmpwmirNode, rf_wait, RF_FALSE,
1300				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1301				    rf_GenericWakeupFunc, 1, 1, 4, 0,
1302				    dag_h, "Wsd", allocList);
1303			RF_ASSERT(pda != NULL);
1304			tmpwmirNode->params[0].p = pdaP;
1305			tmpwmirNode->params[1].p = pda->bufPtr;
1306			tmpwmirNode->params[2].v = parityStripeID;
1307			tmpwmirNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1308			pda = pda->next;
1309			pdaP = pdaP->next;
1310			tmpwmirNode = tmpwmirNode->list_next;
1311		}
1312		RF_ASSERT(pda == NULL);
1313		RF_ASSERT(pdaP == NULL);
1314	}
1315	/* link the header node to the commit node */
1316	RF_ASSERT(dag_h->numSuccedents == 1);
1317	RF_ASSERT(commitNode->numAntecedents == 0);
1318	dag_h->succedents[0] = commitNode;
1319
1320	/* link the commit node to the write nodes */
1321	RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes));
1322	tmpwndNode = wndNode;
1323	for (i = 0; i < nWndNodes; i++) {
1324		RF_ASSERT(tmpwndNode->numAntecedents == 1);
1325		commitNode->succedents[i] = tmpwndNode;
1326		tmpwndNode->antecedents[0] = commitNode;
1327		tmpwndNode->antType[0] = rf_control;
1328		tmpwndNode = tmpwndNode->list_next;
1329	}
1330	tmpwmirNode = wmirNode;
1331	for (i = 0; i < nWmirNodes; i++) {
1332		RF_ASSERT(tmpwmirNode->numAntecedents == 1);
1333		commitNode->succedents[i + nWndNodes] = tmpwmirNode;
1334		tmpwmirNode->antecedents[0] = commitNode;
1335		tmpwmirNode->antType[0] = rf_control;
1336		tmpwmirNode = tmpwmirNode->list_next;
1337	}
1338
1339	/* link the write nodes to the unblock node */
1340	RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes));
1341	tmpwndNode = wndNode;
1342	for (i = 0; i < nWndNodes; i++) {
1343		RF_ASSERT(tmpwndNode->numSuccedents == 1);
1344		tmpwndNode->succedents[0] = unblockNode;
1345		unblockNode->antecedents[i] = tmpwndNode;
1346		unblockNode->antType[i] = rf_control;
1347		tmpwndNode = tmpwndNode->list_next;
1348	}
1349	tmpwmirNode = wmirNode;
1350	for (i = 0; i < nWmirNodes; i++) {
1351		RF_ASSERT(tmpwmirNode->numSuccedents == 1);
1352		tmpwmirNode->succedents[0] = unblockNode;
1353		unblockNode->antecedents[i + nWndNodes] = tmpwmirNode;
1354		unblockNode->antType[i + nWndNodes] = rf_control;
1355		tmpwmirNode = tmpwmirNode->list_next;
1356	}
1357
1358	/* link the unblock node to the term node */
1359	RF_ASSERT(unblockNode->numSuccedents == 1);
1360	RF_ASSERT(termNode->numAntecedents == 1);
1361	RF_ASSERT(termNode->numSuccedents == 0);
1362	unblockNode->succedents[0] = termNode;
1363	termNode->antecedents[0] = unblockNode;
1364	termNode->antType[0] = rf_control;
1365}
1366