1/*	$NetBSD: rf_netbsdkintf.c,v 1.295.6.1 2012/03/21 16:14:57 riz Exp $	*/
2
3/*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 *      The Regents of the University of California.  All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 *    notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 *    notice, this list of conditions and the following disclaimer in the
48 *    documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 *    may be used to endorse or promote products derived from this software
51 *    without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 *      @(#)cd.c        8.2 (Berkeley) 11/16/93
68 */
69
70/*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
89 *  School of Computer Science
90 *  Carnegie Mellon University
91 *  Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97/***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103#include <sys/cdefs.h>
104__KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.295.6.1 2012/03/21 16:14:57 riz Exp $");
105
106#ifdef _KERNEL_OPT
107#include "opt_compat_netbsd.h"
108#include "opt_raid_autoconfig.h"
109#include "raid.h"
110#endif
111
112#include <sys/param.h>
113#include <sys/errno.h>
114#include <sys/pool.h>
115#include <sys/proc.h>
116#include <sys/queue.h>
117#include <sys/disk.h>
118#include <sys/device.h>
119#include <sys/stat.h>
120#include <sys/ioctl.h>
121#include <sys/fcntl.h>
122#include <sys/systm.h>
123#include <sys/vnode.h>
124#include <sys/disklabel.h>
125#include <sys/conf.h>
126#include <sys/buf.h>
127#include <sys/bufq.h>
128#include <sys/reboot.h>
129#include <sys/kauth.h>
130
131#include <prop/proplib.h>
132
133#include <dev/raidframe/raidframevar.h>
134#include <dev/raidframe/raidframeio.h>
135#include <dev/raidframe/rf_paritymap.h>
136
137#include "rf_raid.h"
138#include "rf_copyback.h"
139#include "rf_dag.h"
140#include "rf_dagflags.h"
141#include "rf_desc.h"
142#include "rf_diskqueue.h"
143#include "rf_etimer.h"
144#include "rf_general.h"
145#include "rf_kintf.h"
146#include "rf_options.h"
147#include "rf_driver.h"
148#include "rf_parityscan.h"
149#include "rf_threadstuff.h"
150
151#ifdef COMPAT_50
152#include "rf_compat50.h"
153#endif
154
155#ifdef DEBUG
156int     rf_kdebug_level = 0;
157#define db1_printf(a) if (rf_kdebug_level > 0) printf a
158#else				/* DEBUG */
159#define db1_printf(a) { }
160#endif				/* DEBUG */
161
162static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
163
164#if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
165static rf_declare_mutex2(rf_sparet_wait_mutex);
166static rf_declare_cond2(rf_sparet_wait_cv);
167static rf_declare_cond2(rf_sparet_resp_cv);
168
169static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
170						 * spare table */
171static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
172						 * installation process */
173#endif
174
175MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
176
177/* prototypes */
178static void KernelWakeupFunc(struct buf *);
179static void InitBP(struct buf *, struct vnode *, unsigned,
180    dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
181    void *, int, struct proc *);
182static void raidinit(RF_Raid_t *);
183
184void raidattach(int);
185static int raid_match(device_t, cfdata_t, void *);
186static void raid_attach(device_t, device_t, void *);
187static int raid_detach(device_t, int);
188
189static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
190    daddr_t, daddr_t);
191static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
192    daddr_t, daddr_t, int);
193
194static int raidwrite_component_label(unsigned,
195    dev_t, struct vnode *, RF_ComponentLabel_t *);
196static int raidread_component_label(unsigned,
197    dev_t, struct vnode *, RF_ComponentLabel_t *);
198
199
200dev_type_open(raidopen);
201dev_type_close(raidclose);
202dev_type_read(raidread);
203dev_type_write(raidwrite);
204dev_type_ioctl(raidioctl);
205dev_type_strategy(raidstrategy);
206dev_type_dump(raiddump);
207dev_type_size(raidsize);
208
209const struct bdevsw raid_bdevsw = {
210	raidopen, raidclose, raidstrategy, raidioctl,
211	raiddump, raidsize, D_DISK
212};
213
214const struct cdevsw raid_cdevsw = {
215	raidopen, raidclose, raidread, raidwrite, raidioctl,
216	nostop, notty, nopoll, nommap, nokqfilter, D_DISK
217};
218
219static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
220
221/* XXX Not sure if the following should be replacing the raidPtrs above,
222   or if it should be used in conjunction with that...
223*/
224
225struct raid_softc {
226	device_t sc_dev;
227	int     sc_flags;	/* flags */
228	int     sc_cflags;	/* configuration flags */
229	uint64_t sc_size;	/* size of the raid device */
230	char    sc_xname[20];	/* XXX external name */
231	struct disk sc_dkdev;	/* generic disk device info */
232	struct bufq_state *buf_queue;	/* used for the device queue */
233};
234/* sc_flags */
235#define RAIDF_INITED	0x01	/* unit has been initialized */
236#define RAIDF_WLABEL	0x02	/* label area is writable */
237#define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
238#define RAIDF_SHUTDOWN	0x08	/* unit is being shutdown */
239#define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
240#define RAIDF_LOCKED	0x80	/* unit is locked */
241
242#define	raidunit(x)	DISKUNIT(x)
243int numraid = 0;
244
245extern struct cfdriver raid_cd;
246CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
247    raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
248    DVF_DETACH_SHUTDOWN);
249
250/*
251 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
252 * Be aware that large numbers can allow the driver to consume a lot of
253 * kernel memory, especially on writes, and in degraded mode reads.
254 *
255 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
256 * a single 64K write will typically require 64K for the old data,
257 * 64K for the old parity, and 64K for the new parity, for a total
258 * of 192K (if the parity buffer is not re-used immediately).
259 * Even it if is used immediately, that's still 128K, which when multiplied
260 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
261 *
262 * Now in degraded mode, for example, a 64K read on the above setup may
263 * require data reconstruction, which will require *all* of the 4 remaining
264 * disks to participate -- 4 * 32K/disk == 128K again.
265 */
266
267#ifndef RAIDOUTSTANDING
268#define RAIDOUTSTANDING   6
269#endif
270
271#define RAIDLABELDEV(dev)	\
272	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
273
274/* declared here, and made public, for the benefit of KVM stuff.. */
275struct raid_softc *raid_softc;
276
277static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
278				     struct disklabel *);
279static void raidgetdisklabel(dev_t);
280static void raidmakedisklabel(struct raid_softc *);
281
282static int raidlock(struct raid_softc *);
283static void raidunlock(struct raid_softc *);
284
285static int raid_detach_unlocked(struct raid_softc *);
286
287static void rf_markalldirty(RF_Raid_t *);
288static void rf_set_properties(struct raid_softc *, RF_Raid_t *);
289
290void rf_ReconThread(struct rf_recon_req *);
291void rf_RewriteParityThread(RF_Raid_t *raidPtr);
292void rf_CopybackThread(RF_Raid_t *raidPtr);
293void rf_ReconstructInPlaceThread(struct rf_recon_req *);
294int rf_autoconfig(device_t);
295void rf_buildroothack(RF_ConfigSet_t *);
296
297RF_AutoConfig_t *rf_find_raid_components(void);
298RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
299static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
300int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
301void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
302int rf_set_autoconfig(RF_Raid_t *, int);
303int rf_set_rootpartition(RF_Raid_t *, int);
304void rf_release_all_vps(RF_ConfigSet_t *);
305void rf_cleanup_config_set(RF_ConfigSet_t *);
306int rf_have_enough_components(RF_ConfigSet_t *);
307int rf_auto_config_set(RF_ConfigSet_t *, int *);
308static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
309
310/*
311 * Debugging, mostly.  Set to 0 to not allow autoconfig to take place.
312 * Note that this is overridden by having RAID_AUTOCONFIG as an option
313 * in the kernel config file.
314 */
315#ifdef RAID_AUTOCONFIG
316int raidautoconfig = 1;
317#else
318int raidautoconfig = 0;
319#endif
320static bool raidautoconfigdone = false;
321
322struct RF_Pools_s rf_pools;
323
324void
325raidattach(int num)
326{
327	int raidID;
328	int i, rc;
329
330	aprint_debug("raidattach: Asked for %d units\n", num);
331
332	if (num <= 0) {
333#ifdef DIAGNOSTIC
334		panic("raidattach: count <= 0");
335#endif
336		return;
337	}
338	/* This is where all the initialization stuff gets done. */
339
340	numraid = num;
341
342	/* Make some space for requested number of units... */
343
344	RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
345	if (raidPtrs == NULL) {
346		panic("raidPtrs is NULL!!");
347	}
348
349#if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
350	rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
351	rf_init_cond2(rf_sparet_wait_cv, "sparetw");
352	rf_init_cond2(rf_sparet_resp_cv, "rfgst");
353
354	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
355#endif
356
357	for (i = 0; i < num; i++)
358		raidPtrs[i] = NULL;
359	rc = rf_BootRaidframe();
360	if (rc == 0)
361		aprint_verbose("Kernelized RAIDframe activated\n");
362	else
363		panic("Serious error booting RAID!!");
364
365	/* put together some datastructures like the CCD device does.. This
366	 * lets us lock the device and what-not when it gets opened. */
367
368	raid_softc = (struct raid_softc *)
369		malloc(num * sizeof(struct raid_softc),
370		       M_RAIDFRAME, M_NOWAIT);
371	if (raid_softc == NULL) {
372		aprint_error("WARNING: no memory for RAIDframe driver\n");
373		return;
374	}
375
376	memset(raid_softc, 0, num * sizeof(struct raid_softc));
377
378	for (raidID = 0; raidID < num; raidID++) {
379		bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK);
380
381		RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
382			  (RF_Raid_t *));
383		if (raidPtrs[raidID] == NULL) {
384			aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID);
385			numraid = raidID;
386			return;
387		}
388	}
389
390	if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
391		aprint_error("raidattach: config_cfattach_attach failed?\n");
392	}
393
394	raidautoconfigdone = false;
395
396	/*
397	 * Register a finalizer which will be used to auto-config RAID
398	 * sets once all real hardware devices have been found.
399	 */
400	if (config_finalize_register(NULL, rf_autoconfig) != 0)
401		aprint_error("WARNING: unable to register RAIDframe finalizer\n");
402}
403
404int
405rf_autoconfig(device_t self)
406{
407	RF_AutoConfig_t *ac_list;
408	RF_ConfigSet_t *config_sets;
409
410	if (!raidautoconfig || raidautoconfigdone == true)
411		return (0);
412
413	/* XXX This code can only be run once. */
414	raidautoconfigdone = true;
415
416	/* 1. locate all RAID components on the system */
417	aprint_debug("Searching for RAID components...\n");
418	ac_list = rf_find_raid_components();
419
420	/* 2. Sort them into their respective sets. */
421	config_sets = rf_create_auto_sets(ac_list);
422
423	/*
424	 * 3. Evaluate each set andconfigure the valid ones.
425	 * This gets done in rf_buildroothack().
426	 */
427	rf_buildroothack(config_sets);
428
429	return 1;
430}
431
432void
433rf_buildroothack(RF_ConfigSet_t *config_sets)
434{
435	RF_ConfigSet_t *cset;
436	RF_ConfigSet_t *next_cset;
437	int retcode;
438	int raidID;
439	int rootID;
440	int col;
441	int num_root;
442	char *devname;
443
444	rootID = 0;
445	num_root = 0;
446	cset = config_sets;
447	while (cset != NULL) {
448		next_cset = cset->next;
449		if (rf_have_enough_components(cset) &&
450		    cset->ac->clabel->autoconfigure==1) {
451			retcode = rf_auto_config_set(cset,&raidID);
452			if (!retcode) {
453				aprint_debug("raid%d: configured ok\n", raidID);
454				if (cset->rootable) {
455					rootID = raidID;
456					num_root++;
457				}
458			} else {
459				/* The autoconfig didn't work :( */
460				aprint_debug("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
461				rf_release_all_vps(cset);
462			}
463		} else {
464			/* we're not autoconfiguring this set...
465			   release the associated resources */
466			rf_release_all_vps(cset);
467		}
468		/* cleanup */
469		rf_cleanup_config_set(cset);
470		cset = next_cset;
471	}
472
473	/* if the user has specified what the root device should be
474	   then we don't touch booted_device or boothowto... */
475
476	if (rootspec != NULL)
477		return;
478
479	/* we found something bootable... */
480
481	if (num_root == 1) {
482		booted_device = raid_softc[rootID].sc_dev;
483	} else if (num_root > 1) {
484
485		/*
486		 * Maybe the MD code can help. If it cannot, then
487		 * setroot() will discover that we have no
488		 * booted_device and will ask the user if nothing was
489		 * hardwired in the kernel config file
490		 */
491
492		if (booted_device == NULL)
493			cpu_rootconf();
494		if (booted_device == NULL)
495			return;
496
497		num_root = 0;
498		for (raidID = 0; raidID < numraid; raidID++) {
499			if (raidPtrs[raidID]->valid == 0)
500				continue;
501
502			if (raidPtrs[raidID]->root_partition == 0)
503				continue;
504
505			for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
506				devname = raidPtrs[raidID]->Disks[col].devname;
507				devname += sizeof("/dev/") - 1;
508				if (strncmp(devname, device_xname(booted_device),
509					    strlen(device_xname(booted_device))) != 0)
510					continue;
511				aprint_debug("raid%d includes boot device %s\n",
512				       raidID, devname);
513				num_root++;
514				rootID = raidID;
515			}
516		}
517
518		if (num_root == 1) {
519			booted_device = raid_softc[rootID].sc_dev;
520		} else {
521			/* we can't guess.. require the user to answer... */
522			boothowto |= RB_ASKNAME;
523		}
524	}
525}
526
527
528int
529raidsize(dev_t dev)
530{
531	struct raid_softc *rs;
532	struct disklabel *lp;
533	int     part, unit, omask, size;
534
535	unit = raidunit(dev);
536	if (unit >= numraid)
537		return (-1);
538	rs = &raid_softc[unit];
539
540	if ((rs->sc_flags & RAIDF_INITED) == 0)
541		return (-1);
542
543	part = DISKPART(dev);
544	omask = rs->sc_dkdev.dk_openmask & (1 << part);
545	lp = rs->sc_dkdev.dk_label;
546
547	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
548		return (-1);
549
550	if (lp->d_partitions[part].p_fstype != FS_SWAP)
551		size = -1;
552	else
553		size = lp->d_partitions[part].p_size *
554		    (lp->d_secsize / DEV_BSIZE);
555
556	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
557		return (-1);
558
559	return (size);
560
561}
562
563int
564raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
565{
566	int     unit = raidunit(dev);
567	struct raid_softc *rs;
568	const struct bdevsw *bdev;
569	struct disklabel *lp;
570	RF_Raid_t *raidPtr;
571	daddr_t offset;
572	int     part, c, sparecol, j, scol, dumpto;
573	int     error = 0;
574
575	if (unit >= numraid)
576		return (ENXIO);
577
578	rs = &raid_softc[unit];
579	raidPtr = raidPtrs[unit];
580
581	if ((rs->sc_flags & RAIDF_INITED) == 0)
582		return ENXIO;
583
584	/* we only support dumping to RAID 1 sets */
585	if (raidPtr->Layout.numDataCol != 1 ||
586	    raidPtr->Layout.numParityCol != 1)
587		return EINVAL;
588
589
590	if ((error = raidlock(rs)) != 0)
591		return error;
592
593	if (size % DEV_BSIZE != 0) {
594		error = EINVAL;
595		goto out;
596	}
597
598	if (blkno + size / DEV_BSIZE > rs->sc_size) {
599		printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
600		    "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
601		    size / DEV_BSIZE, rs->sc_size);
602		error = EINVAL;
603		goto out;
604	}
605
606	part = DISKPART(dev);
607	lp = rs->sc_dkdev.dk_label;
608	offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
609
610	/* figure out what device is alive.. */
611
612	/*
613	   Look for a component to dump to.  The preference for the
614	   component to dump to is as follows:
615	   1) the master
616	   2) a used_spare of the master
617	   3) the slave
618	   4) a used_spare of the slave
619	*/
620
621	dumpto = -1;
622	for (c = 0; c < raidPtr->numCol; c++) {
623		if (raidPtr->Disks[c].status == rf_ds_optimal) {
624			/* this might be the one */
625			dumpto = c;
626			break;
627		}
628	}
629
630	/*
631	   At this point we have possibly selected a live master or a
632	   live slave.  We now check to see if there is a spared
633	   master (or a spared slave), if we didn't find a live master
634	   or a live slave.
635	*/
636
637	for (c = 0; c < raidPtr->numSpare; c++) {
638		sparecol = raidPtr->numCol + c;
639		if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
640			/* How about this one? */
641			scol = -1;
642			for(j=0;j<raidPtr->numCol;j++) {
643				if (raidPtr->Disks[j].spareCol == sparecol) {
644					scol = j;
645					break;
646				}
647			}
648			if (scol == 0) {
649				/*
650				   We must have found a spared master!
651				   We'll take that over anything else
652				   found so far.  (We couldn't have
653				   found a real master before, since
654				   this is a used spare, and it's
655				   saying that it's replacing the
656				   master.)  On reboot (with
657				   autoconfiguration turned on)
658				   sparecol will become the 1st
659				   component (component0) of this set.
660				*/
661				dumpto = sparecol;
662				break;
663			} else if (scol != -1) {
664				/*
665				   Must be a spared slave.  We'll dump
666				   to that if we havn't found anything
667				   else so far.
668				*/
669				if (dumpto == -1)
670					dumpto = sparecol;
671			}
672		}
673	}
674
675	if (dumpto == -1) {
676		/* we couldn't find any live components to dump to!?!?
677		 */
678		error = EINVAL;
679		goto out;
680	}
681
682	bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
683
684	/*
685	   Note that blkno is relative to this particular partition.
686	   By adding the offset of this partition in the RAID
687	   set, and also adding RF_PROTECTED_SECTORS, we get a
688	   value that is relative to the partition used for the
689	   underlying component.
690	*/
691
692	error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
693				blkno + offset, va, size);
694
695out:
696	raidunlock(rs);
697
698	return error;
699}
700/* ARGSUSED */
701int
702raidopen(dev_t dev, int flags, int fmt,
703    struct lwp *l)
704{
705	int     unit = raidunit(dev);
706	struct raid_softc *rs;
707	struct disklabel *lp;
708	int     part, pmask;
709	int     error = 0;
710
711	if (unit >= numraid)
712		return (ENXIO);
713	rs = &raid_softc[unit];
714
715	if ((error = raidlock(rs)) != 0)
716		return (error);
717
718	if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
719		error = EBUSY;
720		goto bad;
721	}
722
723	lp = rs->sc_dkdev.dk_label;
724
725	part = DISKPART(dev);
726
727	/*
728	 * If there are wedges, and this is not RAW_PART, then we
729	 * need to fail.
730	 */
731	if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
732		error = EBUSY;
733		goto bad;
734	}
735	pmask = (1 << part);
736
737	if ((rs->sc_flags & RAIDF_INITED) &&
738	    (rs->sc_dkdev.dk_openmask == 0))
739		raidgetdisklabel(dev);
740
741	/* make sure that this partition exists */
742
743	if (part != RAW_PART) {
744		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
745		    ((part >= lp->d_npartitions) ||
746			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
747			error = ENXIO;
748			goto bad;
749		}
750	}
751	/* Prevent this unit from being unconfigured while open. */
752	switch (fmt) {
753	case S_IFCHR:
754		rs->sc_dkdev.dk_copenmask |= pmask;
755		break;
756
757	case S_IFBLK:
758		rs->sc_dkdev.dk_bopenmask |= pmask;
759		break;
760	}
761
762	if ((rs->sc_dkdev.dk_openmask == 0) &&
763	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
764		/* First one... mark things as dirty... Note that we *MUST*
765		 have done a configure before this.  I DO NOT WANT TO BE
766		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
767		 THAT THEY BELONG TOGETHER!!!!! */
768		/* XXX should check to see if we're only open for reading
769		   here... If so, we needn't do this, but then need some
770		   other way of keeping track of what's happened.. */
771
772		rf_markalldirty(raidPtrs[unit]);
773	}
774
775
776	rs->sc_dkdev.dk_openmask =
777	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
778
779bad:
780	raidunlock(rs);
781
782	return (error);
783
784
785}
786/* ARGSUSED */
787int
788raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
789{
790	int     unit = raidunit(dev);
791	struct raid_softc *rs;
792	int     error = 0;
793	int     part;
794
795	if (unit >= numraid)
796		return (ENXIO);
797	rs = &raid_softc[unit];
798
799	if ((error = raidlock(rs)) != 0)
800		return (error);
801
802	part = DISKPART(dev);
803
804	/* ...that much closer to allowing unconfiguration... */
805	switch (fmt) {
806	case S_IFCHR:
807		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
808		break;
809
810	case S_IFBLK:
811		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
812		break;
813	}
814	rs->sc_dkdev.dk_openmask =
815	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
816
817	if ((rs->sc_dkdev.dk_openmask == 0) &&
818	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
819		/* Last one... device is not unconfigured yet.
820		   Device shutdown has taken care of setting the
821		   clean bits if RAIDF_INITED is not set
822		   mark things as clean... */
823
824		rf_update_component_labels(raidPtrs[unit],
825						 RF_FINAL_COMPONENT_UPDATE);
826
827		/* If the kernel is shutting down, it will detach
828		 * this RAID set soon enough.
829		 */
830	}
831
832	raidunlock(rs);
833	return (0);
834
835}
836
837void
838raidstrategy(struct buf *bp)
839{
840	unsigned int raidID = raidunit(bp->b_dev);
841	RF_Raid_t *raidPtr;
842	struct raid_softc *rs = &raid_softc[raidID];
843	int     wlabel;
844
845	if ((rs->sc_flags & RAIDF_INITED) ==0) {
846		bp->b_error = ENXIO;
847		goto done;
848	}
849	if (raidID >= numraid || !raidPtrs[raidID]) {
850		bp->b_error = ENODEV;
851		goto done;
852	}
853	raidPtr = raidPtrs[raidID];
854	if (!raidPtr->valid) {
855		bp->b_error = ENODEV;
856		goto done;
857	}
858	if (bp->b_bcount == 0) {
859		db1_printf(("b_bcount is zero..\n"));
860		goto done;
861	}
862
863	/*
864	 * Do bounds checking and adjust transfer.  If there's an
865	 * error, the bounds check will flag that for us.
866	 */
867
868	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
869	if (DISKPART(bp->b_dev) == RAW_PART) {
870		uint64_t size; /* device size in DEV_BSIZE unit */
871
872		if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
873			size = raidPtr->totalSectors <<
874			    (raidPtr->logBytesPerSector - DEV_BSHIFT);
875		} else {
876			size = raidPtr->totalSectors >>
877			    (DEV_BSHIFT - raidPtr->logBytesPerSector);
878		}
879		if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
880			goto done;
881		}
882	} else {
883		if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
884			db1_printf(("Bounds check failed!!:%d %d\n",
885				(int) bp->b_blkno, (int) wlabel));
886			goto done;
887		}
888	}
889
890	rf_lock_mutex2(raidPtr->iodone_lock);
891
892	bp->b_resid = 0;
893
894	/* stuff it onto our queue */
895	bufq_put(rs->buf_queue, bp);
896
897	/* scheduled the IO to happen at the next convenient time */
898	rf_signal_cond2(raidPtr->iodone_cv);
899	rf_unlock_mutex2(raidPtr->iodone_lock);
900
901	return;
902
903done:
904	bp->b_resid = bp->b_bcount;
905	biodone(bp);
906}
907/* ARGSUSED */
908int
909raidread(dev_t dev, struct uio *uio, int flags)
910{
911	int     unit = raidunit(dev);
912	struct raid_softc *rs;
913
914	if (unit >= numraid)
915		return (ENXIO);
916	rs = &raid_softc[unit];
917
918	if ((rs->sc_flags & RAIDF_INITED) == 0)
919		return (ENXIO);
920
921	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
922
923}
924/* ARGSUSED */
925int
926raidwrite(dev_t dev, struct uio *uio, int flags)
927{
928	int     unit = raidunit(dev);
929	struct raid_softc *rs;
930
931	if (unit >= numraid)
932		return (ENXIO);
933	rs = &raid_softc[unit];
934
935	if ((rs->sc_flags & RAIDF_INITED) == 0)
936		return (ENXIO);
937
938	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
939
940}
941
942static int
943raid_detach_unlocked(struct raid_softc *rs)
944{
945	int error;
946	RF_Raid_t *raidPtr;
947
948	raidPtr = raidPtrs[device_unit(rs->sc_dev)];
949
950	/*
951	 * If somebody has a partition mounted, we shouldn't
952	 * shutdown.
953	 */
954	if (rs->sc_dkdev.dk_openmask != 0)
955		return EBUSY;
956
957	if ((rs->sc_flags & RAIDF_INITED) == 0)
958		;	/* not initialized: nothing to do */
959	else if ((error = rf_Shutdown(raidPtr)) != 0)
960		return error;
961	else
962		rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
963
964	/* Detach the disk. */
965	dkwedge_delall(&rs->sc_dkdev);
966	disk_detach(&rs->sc_dkdev);
967	disk_destroy(&rs->sc_dkdev);
968
969	aprint_normal_dev(rs->sc_dev, "detached\n");
970
971	return 0;
972}
973
974int
975raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
976{
977	int     unit = raidunit(dev);
978	int     error = 0;
979	int     part, pmask, s;
980	cfdata_t cf;
981	struct raid_softc *rs;
982	RF_Config_t *k_cfg, *u_cfg;
983	RF_Raid_t *raidPtr;
984	RF_RaidDisk_t *diskPtr;
985	RF_AccTotals_t *totals;
986	RF_DeviceConfig_t *d_cfg, **ucfgp;
987	u_char *specific_buf;
988	int retcode = 0;
989	int column;
990/*	int raidid; */
991	struct rf_recon_req *rrcopy, *rr;
992	RF_ComponentLabel_t *clabel;
993	RF_ComponentLabel_t *ci_label;
994	RF_ComponentLabel_t **clabel_ptr;
995	RF_SingleComponent_t *sparePtr,*componentPtr;
996	RF_SingleComponent_t component;
997	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
998	int i, j, d;
999#ifdef __HAVE_OLD_DISKLABEL
1000	struct disklabel newlabel;
1001#endif
1002	struct dkwedge_info *dkw;
1003
1004	if (unit >= numraid)
1005		return (ENXIO);
1006	rs = &raid_softc[unit];
1007	raidPtr = raidPtrs[unit];
1008
1009	db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1010		(int) DISKPART(dev), (int) unit, cmd));
1011
1012	/* Must be open for writes for these commands... */
1013	switch (cmd) {
1014#ifdef DIOCGSECTORSIZE
1015	case DIOCGSECTORSIZE:
1016		*(u_int *)data = raidPtr->bytesPerSector;
1017		return 0;
1018	case DIOCGMEDIASIZE:
1019		*(off_t *)data =
1020		    (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1021		return 0;
1022#endif
1023	case DIOCSDINFO:
1024	case DIOCWDINFO:
1025#ifdef __HAVE_OLD_DISKLABEL
1026	case ODIOCWDINFO:
1027	case ODIOCSDINFO:
1028#endif
1029	case DIOCWLABEL:
1030	case DIOCAWEDGE:
1031	case DIOCDWEDGE:
1032	case DIOCSSTRATEGY:
1033		if ((flag & FWRITE) == 0)
1034			return (EBADF);
1035	}
1036
1037	/* Must be initialized for these... */
1038	switch (cmd) {
1039	case DIOCGDINFO:
1040	case DIOCSDINFO:
1041	case DIOCWDINFO:
1042#ifdef __HAVE_OLD_DISKLABEL
1043	case ODIOCGDINFO:
1044	case ODIOCWDINFO:
1045	case ODIOCSDINFO:
1046	case ODIOCGDEFLABEL:
1047#endif
1048	case DIOCGPART:
1049	case DIOCWLABEL:
1050	case DIOCGDEFLABEL:
1051	case DIOCAWEDGE:
1052	case DIOCDWEDGE:
1053	case DIOCLWEDGES:
1054	case DIOCCACHESYNC:
1055	case RAIDFRAME_SHUTDOWN:
1056	case RAIDFRAME_REWRITEPARITY:
1057	case RAIDFRAME_GET_INFO:
1058	case RAIDFRAME_RESET_ACCTOTALS:
1059	case RAIDFRAME_GET_ACCTOTALS:
1060	case RAIDFRAME_KEEP_ACCTOTALS:
1061	case RAIDFRAME_GET_SIZE:
1062	case RAIDFRAME_FAIL_DISK:
1063	case RAIDFRAME_COPYBACK:
1064	case RAIDFRAME_CHECK_RECON_STATUS:
1065	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1066	case RAIDFRAME_GET_COMPONENT_LABEL:
1067	case RAIDFRAME_SET_COMPONENT_LABEL:
1068	case RAIDFRAME_ADD_HOT_SPARE:
1069	case RAIDFRAME_REMOVE_HOT_SPARE:
1070	case RAIDFRAME_INIT_LABELS:
1071	case RAIDFRAME_REBUILD_IN_PLACE:
1072	case RAIDFRAME_CHECK_PARITY:
1073	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1074	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1075	case RAIDFRAME_CHECK_COPYBACK_STATUS:
1076	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1077	case RAIDFRAME_SET_AUTOCONFIG:
1078	case RAIDFRAME_SET_ROOT:
1079	case RAIDFRAME_DELETE_COMPONENT:
1080	case RAIDFRAME_INCORPORATE_HOT_SPARE:
1081	case RAIDFRAME_PARITYMAP_STATUS:
1082	case RAIDFRAME_PARITYMAP_GET_DISABLE:
1083	case RAIDFRAME_PARITYMAP_SET_DISABLE:
1084	case RAIDFRAME_PARITYMAP_SET_PARAMS:
1085	case DIOCGSTRATEGY:
1086	case DIOCSSTRATEGY:
1087		if ((rs->sc_flags & RAIDF_INITED) == 0)
1088			return (ENXIO);
1089	}
1090
1091	switch (cmd) {
1092#ifdef COMPAT_50
1093	case RAIDFRAME_GET_INFO50:
1094		return rf_get_info50(raidPtr, data);
1095
1096	case RAIDFRAME_CONFIGURE50:
1097		if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1098			return retcode;
1099		goto config;
1100#endif
1101		/* configure the system */
1102	case RAIDFRAME_CONFIGURE:
1103
1104		if (raidPtr->valid) {
1105			/* There is a valid RAID set running on this unit! */
1106			printf("raid%d: Device already configured!\n",unit);
1107			return(EINVAL);
1108		}
1109
1110		/* copy-in the configuration information */
1111		/* data points to a pointer to the configuration structure */
1112
1113		u_cfg = *((RF_Config_t **) data);
1114		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1115		if (k_cfg == NULL) {
1116			return (ENOMEM);
1117		}
1118		retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1119		if (retcode) {
1120			RF_Free(k_cfg, sizeof(RF_Config_t));
1121			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1122				retcode));
1123			return (retcode);
1124		}
1125		goto config;
1126	config:
1127		/* allocate a buffer for the layout-specific data, and copy it
1128		 * in */
1129		if (k_cfg->layoutSpecificSize) {
1130			if (k_cfg->layoutSpecificSize > 10000) {
1131				/* sanity check */
1132				RF_Free(k_cfg, sizeof(RF_Config_t));
1133				return (EINVAL);
1134			}
1135			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1136			    (u_char *));
1137			if (specific_buf == NULL) {
1138				RF_Free(k_cfg, sizeof(RF_Config_t));
1139				return (ENOMEM);
1140			}
1141			retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1142			    k_cfg->layoutSpecificSize);
1143			if (retcode) {
1144				RF_Free(k_cfg, sizeof(RF_Config_t));
1145				RF_Free(specific_buf,
1146					k_cfg->layoutSpecificSize);
1147				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1148					retcode));
1149				return (retcode);
1150			}
1151		} else
1152			specific_buf = NULL;
1153		k_cfg->layoutSpecific = specific_buf;
1154
1155		/* should do some kind of sanity check on the configuration.
1156		 * Store the sum of all the bytes in the last byte? */
1157
1158		/* configure the system */
1159
1160		/*
1161		 * Clear the entire RAID descriptor, just to make sure
1162		 *  there is no stale data left in the case of a
1163		 *  reconfiguration
1164		 */
1165		memset(raidPtr, 0, sizeof(*raidPtr));
1166		raidPtr->raidid = unit;
1167
1168		retcode = rf_Configure(raidPtr, k_cfg, NULL);
1169
1170		if (retcode == 0) {
1171
1172			/* allow this many simultaneous IO's to
1173			   this RAID device */
1174			raidPtr->openings = RAIDOUTSTANDING;
1175
1176			raidinit(raidPtr);
1177			rf_markalldirty(raidPtr);
1178		}
1179		/* free the buffers.  No return code here. */
1180		if (k_cfg->layoutSpecificSize) {
1181			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1182		}
1183		RF_Free(k_cfg, sizeof(RF_Config_t));
1184
1185		return (retcode);
1186
1187		/* shutdown the system */
1188	case RAIDFRAME_SHUTDOWN:
1189
1190		part = DISKPART(dev);
1191		pmask = (1 << part);
1192
1193		if ((error = raidlock(rs)) != 0)
1194			return (error);
1195
1196		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1197		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1198			(rs->sc_dkdev.dk_copenmask & pmask)))
1199			retcode = EBUSY;
1200		else {
1201			rs->sc_flags |= RAIDF_SHUTDOWN;
1202			rs->sc_dkdev.dk_copenmask &= ~pmask;
1203			rs->sc_dkdev.dk_bopenmask &= ~pmask;
1204			rs->sc_dkdev.dk_openmask &= ~pmask;
1205			retcode = 0;
1206		}
1207
1208		raidunlock(rs);
1209
1210		if (retcode != 0)
1211			return retcode;
1212
1213		/* free the pseudo device attach bits */
1214
1215		cf = device_cfdata(rs->sc_dev);
1216		if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1217			free(cf, M_RAIDFRAME);
1218
1219		return (retcode);
1220	case RAIDFRAME_GET_COMPONENT_LABEL:
1221		clabel_ptr = (RF_ComponentLabel_t **) data;
1222		/* need to read the component label for the disk indicated
1223		   by row,column in clabel */
1224
1225		/*
1226		 * Perhaps there should be an option to skip the in-core
1227		 * copy and hit the disk, as with disklabel(8).
1228		 */
1229		RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1230
1231		retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1232
1233		if (retcode) {
1234			RF_Free(clabel, sizeof(*clabel));
1235			return retcode;
1236		}
1237
1238		clabel->row = 0; /* Don't allow looking at anything else.*/
1239
1240		column = clabel->column;
1241
1242		if ((column < 0) || (column >= raidPtr->numCol +
1243		    raidPtr->numSpare)) {
1244			RF_Free(clabel, sizeof(*clabel));
1245			return EINVAL;
1246		}
1247
1248		RF_Free(clabel, sizeof(*clabel));
1249
1250		clabel = raidget_component_label(raidPtr, column);
1251
1252		return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1253
1254#if 0
1255	case RAIDFRAME_SET_COMPONENT_LABEL:
1256		clabel = (RF_ComponentLabel_t *) data;
1257
1258		/* XXX check the label for valid stuff... */
1259		/* Note that some things *should not* get modified --
1260		   the user should be re-initing the labels instead of
1261		   trying to patch things.
1262		   */
1263
1264		raidid = raidPtr->raidid;
1265#ifdef DEBUG
1266		printf("raid%d: Got component label:\n", raidid);
1267		printf("raid%d: Version: %d\n", raidid, clabel->version);
1268		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1269		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1270		printf("raid%d: Column: %d\n", raidid, clabel->column);
1271		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1272		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1273		printf("raid%d: Status: %d\n", raidid, clabel->status);
1274#endif
1275		clabel->row = 0;
1276		column = clabel->column;
1277
1278		if ((column < 0) || (column >= raidPtr->numCol)) {
1279			return(EINVAL);
1280		}
1281
1282		/* XXX this isn't allowed to do anything for now :-) */
1283
1284		/* XXX and before it is, we need to fill in the rest
1285		   of the fields!?!?!?! */
1286		memcpy(raidget_component_label(raidPtr, column),
1287		    clabel, sizeof(*clabel));
1288		raidflush_component_label(raidPtr, column);
1289		return (0);
1290#endif
1291
1292	case RAIDFRAME_INIT_LABELS:
1293		clabel = (RF_ComponentLabel_t *) data;
1294		/*
1295		   we only want the serial number from
1296		   the above.  We get all the rest of the information
1297		   from the config that was used to create this RAID
1298		   set.
1299		   */
1300
1301		raidPtr->serial_number = clabel->serial_number;
1302
1303		for(column=0;column<raidPtr->numCol;column++) {
1304			diskPtr = &raidPtr->Disks[column];
1305			if (!RF_DEAD_DISK(diskPtr->status)) {
1306				ci_label = raidget_component_label(raidPtr,
1307				    column);
1308				/* Zeroing this is important. */
1309				memset(ci_label, 0, sizeof(*ci_label));
1310				raid_init_component_label(raidPtr, ci_label);
1311				ci_label->serial_number =
1312				    raidPtr->serial_number;
1313				ci_label->row = 0; /* we dont' pretend to support more */
1314				rf_component_label_set_partitionsize(ci_label,
1315				    diskPtr->partitionSize);
1316				ci_label->column = column;
1317				raidflush_component_label(raidPtr, column);
1318			}
1319			/* XXXjld what about the spares? */
1320		}
1321
1322		return (retcode);
1323	case RAIDFRAME_SET_AUTOCONFIG:
1324		d = rf_set_autoconfig(raidPtr, *(int *) data);
1325		printf("raid%d: New autoconfig value is: %d\n",
1326		       raidPtr->raidid, d);
1327		*(int *) data = d;
1328		return (retcode);
1329
1330	case RAIDFRAME_SET_ROOT:
1331		d = rf_set_rootpartition(raidPtr, *(int *) data);
1332		printf("raid%d: New rootpartition value is: %d\n",
1333		       raidPtr->raidid, d);
1334		*(int *) data = d;
1335		return (retcode);
1336
1337		/* initialize all parity */
1338	case RAIDFRAME_REWRITEPARITY:
1339
1340		if (raidPtr->Layout.map->faultsTolerated == 0) {
1341			/* Parity for RAID 0 is trivially correct */
1342			raidPtr->parity_good = RF_RAID_CLEAN;
1343			return(0);
1344		}
1345
1346		if (raidPtr->parity_rewrite_in_progress == 1) {
1347			/* Re-write is already in progress! */
1348			return(EINVAL);
1349		}
1350
1351		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1352					   rf_RewriteParityThread,
1353					   raidPtr,"raid_parity");
1354		return (retcode);
1355
1356
1357	case RAIDFRAME_ADD_HOT_SPARE:
1358		sparePtr = (RF_SingleComponent_t *) data;
1359		memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1360		retcode = rf_add_hot_spare(raidPtr, &component);
1361		return(retcode);
1362
1363	case RAIDFRAME_REMOVE_HOT_SPARE:
1364		return(retcode);
1365
1366	case RAIDFRAME_DELETE_COMPONENT:
1367		componentPtr = (RF_SingleComponent_t *)data;
1368		memcpy( &component, componentPtr,
1369			sizeof(RF_SingleComponent_t));
1370		retcode = rf_delete_component(raidPtr, &component);
1371		return(retcode);
1372
1373	case RAIDFRAME_INCORPORATE_HOT_SPARE:
1374		componentPtr = (RF_SingleComponent_t *)data;
1375		memcpy( &component, componentPtr,
1376			sizeof(RF_SingleComponent_t));
1377		retcode = rf_incorporate_hot_spare(raidPtr, &component);
1378		return(retcode);
1379
1380	case RAIDFRAME_REBUILD_IN_PLACE:
1381
1382		if (raidPtr->Layout.map->faultsTolerated == 0) {
1383			/* Can't do this on a RAID 0!! */
1384			return(EINVAL);
1385		}
1386
1387		if (raidPtr->recon_in_progress == 1) {
1388			/* a reconstruct is already in progress! */
1389			return(EINVAL);
1390		}
1391
1392		componentPtr = (RF_SingleComponent_t *) data;
1393		memcpy( &component, componentPtr,
1394			sizeof(RF_SingleComponent_t));
1395		component.row = 0; /* we don't support any more */
1396		column = component.column;
1397
1398		if ((column < 0) || (column >= raidPtr->numCol)) {
1399			return(EINVAL);
1400		}
1401
1402		rf_lock_mutex2(raidPtr->mutex);
1403		if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1404		    (raidPtr->numFailures > 0)) {
1405			/* XXX 0 above shouldn't be constant!!! */
1406			/* some component other than this has failed.
1407			   Let's not make things worse than they already
1408			   are... */
1409			printf("raid%d: Unable to reconstruct to disk at:\n",
1410			       raidPtr->raidid);
1411			printf("raid%d:     Col: %d   Too many failures.\n",
1412			       raidPtr->raidid, column);
1413			rf_unlock_mutex2(raidPtr->mutex);
1414			return (EINVAL);
1415		}
1416		if (raidPtr->Disks[column].status ==
1417		    rf_ds_reconstructing) {
1418			printf("raid%d: Unable to reconstruct to disk at:\n",
1419			       raidPtr->raidid);
1420			printf("raid%d:    Col: %d   Reconstruction already occuring!\n", raidPtr->raidid, column);
1421
1422			rf_unlock_mutex2(raidPtr->mutex);
1423			return (EINVAL);
1424		}
1425		if (raidPtr->Disks[column].status == rf_ds_spared) {
1426			rf_unlock_mutex2(raidPtr->mutex);
1427			return (EINVAL);
1428		}
1429		rf_unlock_mutex2(raidPtr->mutex);
1430
1431		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1432		if (rrcopy == NULL)
1433			return(ENOMEM);
1434
1435		rrcopy->raidPtr = (void *) raidPtr;
1436		rrcopy->col = column;
1437
1438		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1439					   rf_ReconstructInPlaceThread,
1440					   rrcopy,"raid_reconip");
1441		return(retcode);
1442
1443	case RAIDFRAME_GET_INFO:
1444		if (!raidPtr->valid)
1445			return (ENODEV);
1446		ucfgp = (RF_DeviceConfig_t **) data;
1447		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1448			  (RF_DeviceConfig_t *));
1449		if (d_cfg == NULL)
1450			return (ENOMEM);
1451		d_cfg->rows = 1; /* there is only 1 row now */
1452		d_cfg->cols = raidPtr->numCol;
1453		d_cfg->ndevs = raidPtr->numCol;
1454		if (d_cfg->ndevs >= RF_MAX_DISKS) {
1455			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1456			return (ENOMEM);
1457		}
1458		d_cfg->nspares = raidPtr->numSpare;
1459		if (d_cfg->nspares >= RF_MAX_DISKS) {
1460			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1461			return (ENOMEM);
1462		}
1463		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1464		d = 0;
1465		for (j = 0; j < d_cfg->cols; j++) {
1466			d_cfg->devs[d] = raidPtr->Disks[j];
1467			d++;
1468		}
1469		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1470			d_cfg->spares[i] = raidPtr->Disks[j];
1471		}
1472		retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1473		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1474
1475		return (retcode);
1476
1477	case RAIDFRAME_CHECK_PARITY:
1478		*(int *) data = raidPtr->parity_good;
1479		return (0);
1480
1481	case RAIDFRAME_PARITYMAP_STATUS:
1482		if (rf_paritymap_ineligible(raidPtr))
1483			return EINVAL;
1484		rf_paritymap_status(raidPtr->parity_map,
1485		    (struct rf_pmstat *)data);
1486		return 0;
1487
1488	case RAIDFRAME_PARITYMAP_SET_PARAMS:
1489		if (rf_paritymap_ineligible(raidPtr))
1490			return EINVAL;
1491		if (raidPtr->parity_map == NULL)
1492			return ENOENT; /* ??? */
1493		if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1494			(struct rf_pmparams *)data, 1))
1495			return EINVAL;
1496		return 0;
1497
1498	case RAIDFRAME_PARITYMAP_GET_DISABLE:
1499		if (rf_paritymap_ineligible(raidPtr))
1500			return EINVAL;
1501		*(int *) data = rf_paritymap_get_disable(raidPtr);
1502		return 0;
1503
1504	case RAIDFRAME_PARITYMAP_SET_DISABLE:
1505		if (rf_paritymap_ineligible(raidPtr))
1506			return EINVAL;
1507		rf_paritymap_set_disable(raidPtr, *(int *)data);
1508		/* XXX should errors be passed up? */
1509		return 0;
1510
1511	case RAIDFRAME_RESET_ACCTOTALS:
1512		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1513		return (0);
1514
1515	case RAIDFRAME_GET_ACCTOTALS:
1516		totals = (RF_AccTotals_t *) data;
1517		*totals = raidPtr->acc_totals;
1518		return (0);
1519
1520	case RAIDFRAME_KEEP_ACCTOTALS:
1521		raidPtr->keep_acc_totals = *(int *)data;
1522		return (0);
1523
1524	case RAIDFRAME_GET_SIZE:
1525		*(int *) data = raidPtr->totalSectors;
1526		return (0);
1527
1528		/* fail a disk & optionally start reconstruction */
1529	case RAIDFRAME_FAIL_DISK:
1530
1531		if (raidPtr->Layout.map->faultsTolerated == 0) {
1532			/* Can't do this on a RAID 0!! */
1533			return(EINVAL);
1534		}
1535
1536		rr = (struct rf_recon_req *) data;
1537		rr->row = 0;
1538		if (rr->col < 0 || rr->col >= raidPtr->numCol)
1539			return (EINVAL);
1540
1541
1542		rf_lock_mutex2(raidPtr->mutex);
1543		if (raidPtr->status == rf_rs_reconstructing) {
1544			/* you can't fail a disk while we're reconstructing! */
1545			/* XXX wrong for RAID6 */
1546			rf_unlock_mutex2(raidPtr->mutex);
1547			return (EINVAL);
1548		}
1549		if ((raidPtr->Disks[rr->col].status ==
1550		     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1551			/* some other component has failed.  Let's not make
1552			   things worse. XXX wrong for RAID6 */
1553			rf_unlock_mutex2(raidPtr->mutex);
1554			return (EINVAL);
1555		}
1556		if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1557			/* Can't fail a spared disk! */
1558			rf_unlock_mutex2(raidPtr->mutex);
1559			return (EINVAL);
1560		}
1561		rf_unlock_mutex2(raidPtr->mutex);
1562
1563		/* make a copy of the recon request so that we don't rely on
1564		 * the user's buffer */
1565		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1566		if (rrcopy == NULL)
1567			return(ENOMEM);
1568		memcpy(rrcopy, rr, sizeof(*rr));
1569		rrcopy->raidPtr = (void *) raidPtr;
1570
1571		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1572					   rf_ReconThread,
1573					   rrcopy,"raid_recon");
1574		return (0);
1575
1576		/* invoke a copyback operation after recon on whatever disk
1577		 * needs it, if any */
1578	case RAIDFRAME_COPYBACK:
1579
1580		if (raidPtr->Layout.map->faultsTolerated == 0) {
1581			/* This makes no sense on a RAID 0!! */
1582			return(EINVAL);
1583		}
1584
1585		if (raidPtr->copyback_in_progress == 1) {
1586			/* Copyback is already in progress! */
1587			return(EINVAL);
1588		}
1589
1590		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1591					   rf_CopybackThread,
1592					   raidPtr,"raid_copyback");
1593		return (retcode);
1594
1595		/* return the percentage completion of reconstruction */
1596	case RAIDFRAME_CHECK_RECON_STATUS:
1597		if (raidPtr->Layout.map->faultsTolerated == 0) {
1598			/* This makes no sense on a RAID 0, so tell the
1599			   user it's done. */
1600			*(int *) data = 100;
1601			return(0);
1602		}
1603		if (raidPtr->status != rf_rs_reconstructing)
1604			*(int *) data = 100;
1605		else {
1606			if (raidPtr->reconControl->numRUsTotal > 0) {
1607				*(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1608			} else {
1609				*(int *) data = 0;
1610			}
1611		}
1612		return (0);
1613	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1614		progressInfoPtr = (RF_ProgressInfo_t **) data;
1615		if (raidPtr->status != rf_rs_reconstructing) {
1616			progressInfo.remaining = 0;
1617			progressInfo.completed = 100;
1618			progressInfo.total = 100;
1619		} else {
1620			progressInfo.total =
1621				raidPtr->reconControl->numRUsTotal;
1622			progressInfo.completed =
1623				raidPtr->reconControl->numRUsComplete;
1624			progressInfo.remaining = progressInfo.total -
1625				progressInfo.completed;
1626		}
1627		retcode = copyout(&progressInfo, *progressInfoPtr,
1628				  sizeof(RF_ProgressInfo_t));
1629		return (retcode);
1630
1631	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1632		if (raidPtr->Layout.map->faultsTolerated == 0) {
1633			/* This makes no sense on a RAID 0, so tell the
1634			   user it's done. */
1635			*(int *) data = 100;
1636			return(0);
1637		}
1638		if (raidPtr->parity_rewrite_in_progress == 1) {
1639			*(int *) data = 100 *
1640				raidPtr->parity_rewrite_stripes_done /
1641				raidPtr->Layout.numStripe;
1642		} else {
1643			*(int *) data = 100;
1644		}
1645		return (0);
1646
1647	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1648		progressInfoPtr = (RF_ProgressInfo_t **) data;
1649		if (raidPtr->parity_rewrite_in_progress == 1) {
1650			progressInfo.total = raidPtr->Layout.numStripe;
1651			progressInfo.completed =
1652				raidPtr->parity_rewrite_stripes_done;
1653			progressInfo.remaining = progressInfo.total -
1654				progressInfo.completed;
1655		} else {
1656			progressInfo.remaining = 0;
1657			progressInfo.completed = 100;
1658			progressInfo.total = 100;
1659		}
1660		retcode = copyout(&progressInfo, *progressInfoPtr,
1661				  sizeof(RF_ProgressInfo_t));
1662		return (retcode);
1663
1664	case RAIDFRAME_CHECK_COPYBACK_STATUS:
1665		if (raidPtr->Layout.map->faultsTolerated == 0) {
1666			/* This makes no sense on a RAID 0 */
1667			*(int *) data = 100;
1668			return(0);
1669		}
1670		if (raidPtr->copyback_in_progress == 1) {
1671			*(int *) data = 100 * raidPtr->copyback_stripes_done /
1672				raidPtr->Layout.numStripe;
1673		} else {
1674			*(int *) data = 100;
1675		}
1676		return (0);
1677
1678	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1679		progressInfoPtr = (RF_ProgressInfo_t **) data;
1680		if (raidPtr->copyback_in_progress == 1) {
1681			progressInfo.total = raidPtr->Layout.numStripe;
1682			progressInfo.completed =
1683				raidPtr->copyback_stripes_done;
1684			progressInfo.remaining = progressInfo.total -
1685				progressInfo.completed;
1686		} else {
1687			progressInfo.remaining = 0;
1688			progressInfo.completed = 100;
1689			progressInfo.total = 100;
1690		}
1691		retcode = copyout(&progressInfo, *progressInfoPtr,
1692				  sizeof(RF_ProgressInfo_t));
1693		return (retcode);
1694
1695		/* the sparetable daemon calls this to wait for the kernel to
1696		 * need a spare table. this ioctl does not return until a
1697		 * spare table is needed. XXX -- calling mpsleep here in the
1698		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1699		 * -- I should either compute the spare table in the kernel,
1700		 * or have a different -- XXX XXX -- interface (a different
1701		 * character device) for delivering the table     -- XXX */
1702#if 0
1703	case RAIDFRAME_SPARET_WAIT:
1704		rf_lock_mutex2(rf_sparet_wait_mutex);
1705		while (!rf_sparet_wait_queue)
1706			rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1707		waitreq = rf_sparet_wait_queue;
1708		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1709		rf_unlock_mutex2(rf_sparet_wait_mutex);
1710
1711		/* structure assignment */
1712		*((RF_SparetWait_t *) data) = *waitreq;
1713
1714		RF_Free(waitreq, sizeof(*waitreq));
1715		return (0);
1716
1717		/* wakes up a process waiting on SPARET_WAIT and puts an error
1718		 * code in it that will cause the dameon to exit */
1719	case RAIDFRAME_ABORT_SPARET_WAIT:
1720		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1721		waitreq->fcol = -1;
1722		rf_lock_mutex2(rf_sparet_wait_mutex);
1723		waitreq->next = rf_sparet_wait_queue;
1724		rf_sparet_wait_queue = waitreq;
1725		rf_broadcast_conf2(rf_sparet_wait_cv);
1726		rf_unlock_mutex2(rf_sparet_wait_mutex);
1727		return (0);
1728
1729		/* used by the spare table daemon to deliver a spare table
1730		 * into the kernel */
1731	case RAIDFRAME_SEND_SPARET:
1732
1733		/* install the spare table */
1734		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1735
1736		/* respond to the requestor.  the return status of the spare
1737		 * table installation is passed in the "fcol" field */
1738		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1739		waitreq->fcol = retcode;
1740		rf_lock_mutex2(rf_sparet_wait_mutex);
1741		waitreq->next = rf_sparet_resp_queue;
1742		rf_sparet_resp_queue = waitreq;
1743		rf_broadcast_cond2(rf_sparet_resp_cv);
1744		rf_unlock_mutex2(rf_sparet_wait_mutex);
1745
1746		return (retcode);
1747#endif
1748
1749	default:
1750		break; /* fall through to the os-specific code below */
1751
1752	}
1753
1754	if (!raidPtr->valid)
1755		return (EINVAL);
1756
1757	/*
1758	 * Add support for "regular" device ioctls here.
1759	 */
1760
1761	error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l);
1762	if (error != EPASSTHROUGH)
1763		return (error);
1764
1765	switch (cmd) {
1766	case DIOCGDINFO:
1767		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1768		break;
1769#ifdef __HAVE_OLD_DISKLABEL
1770	case ODIOCGDINFO:
1771		newlabel = *(rs->sc_dkdev.dk_label);
1772		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1773			return ENOTTY;
1774		memcpy(data, &newlabel, sizeof (struct olddisklabel));
1775		break;
1776#endif
1777
1778	case DIOCGPART:
1779		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1780		((struct partinfo *) data)->part =
1781		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1782		break;
1783
1784	case DIOCWDINFO:
1785	case DIOCSDINFO:
1786#ifdef __HAVE_OLD_DISKLABEL
1787	case ODIOCWDINFO:
1788	case ODIOCSDINFO:
1789#endif
1790	{
1791		struct disklabel *lp;
1792#ifdef __HAVE_OLD_DISKLABEL
1793		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1794			memset(&newlabel, 0, sizeof newlabel);
1795			memcpy(&newlabel, data, sizeof (struct olddisklabel));
1796			lp = &newlabel;
1797		} else
1798#endif
1799		lp = (struct disklabel *)data;
1800
1801		if ((error = raidlock(rs)) != 0)
1802			return (error);
1803
1804		rs->sc_flags |= RAIDF_LABELLING;
1805
1806		error = setdisklabel(rs->sc_dkdev.dk_label,
1807		    lp, 0, rs->sc_dkdev.dk_cpulabel);
1808		if (error == 0) {
1809			if (cmd == DIOCWDINFO
1810#ifdef __HAVE_OLD_DISKLABEL
1811			    || cmd == ODIOCWDINFO
1812#endif
1813			   )
1814				error = writedisklabel(RAIDLABELDEV(dev),
1815				    raidstrategy, rs->sc_dkdev.dk_label,
1816				    rs->sc_dkdev.dk_cpulabel);
1817		}
1818		rs->sc_flags &= ~RAIDF_LABELLING;
1819
1820		raidunlock(rs);
1821
1822		if (error)
1823			return (error);
1824		break;
1825	}
1826
1827	case DIOCWLABEL:
1828		if (*(int *) data != 0)
1829			rs->sc_flags |= RAIDF_WLABEL;
1830		else
1831			rs->sc_flags &= ~RAIDF_WLABEL;
1832		break;
1833
1834	case DIOCGDEFLABEL:
1835		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1836		break;
1837
1838#ifdef __HAVE_OLD_DISKLABEL
1839	case ODIOCGDEFLABEL:
1840		raidgetdefaultlabel(raidPtr, rs, &newlabel);
1841		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1842			return ENOTTY;
1843		memcpy(data, &newlabel, sizeof (struct olddisklabel));
1844		break;
1845#endif
1846
1847	case DIOCAWEDGE:
1848	case DIOCDWEDGE:
1849	    	dkw = (void *)data;
1850
1851		/* If the ioctl happens here, the parent is us. */
1852		(void)strcpy(dkw->dkw_parent, rs->sc_xname);
1853		return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1854
1855	case DIOCLWEDGES:
1856		return dkwedge_list(&rs->sc_dkdev,
1857		    (struct dkwedge_list *)data, l);
1858	case DIOCCACHESYNC:
1859		return rf_sync_component_caches(raidPtr);
1860
1861	case DIOCGSTRATEGY:
1862	    {
1863		struct disk_strategy *dks = (void *)data;
1864
1865		s = splbio();
1866		strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1867		    sizeof(dks->dks_name));
1868		splx(s);
1869		dks->dks_paramlen = 0;
1870
1871		return 0;
1872	    }
1873
1874	case DIOCSSTRATEGY:
1875	    {
1876		struct disk_strategy *dks = (void *)data;
1877		struct bufq_state *new;
1878		struct bufq_state *old;
1879
1880		if (dks->dks_param != NULL) {
1881			return EINVAL;
1882		}
1883		dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1884		error = bufq_alloc(&new, dks->dks_name,
1885		    BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1886		if (error) {
1887			return error;
1888		}
1889		s = splbio();
1890		old = rs->buf_queue;
1891		bufq_move(new, old);
1892		rs->buf_queue = new;
1893		splx(s);
1894		bufq_free(old);
1895
1896		return 0;
1897	    }
1898
1899	default:
1900		retcode = ENOTTY;
1901	}
1902	return (retcode);
1903
1904}
1905
1906
1907/* raidinit -- complete the rest of the initialization for the
1908   RAIDframe device.  */
1909
1910
1911static void
1912raidinit(RF_Raid_t *raidPtr)
1913{
1914	cfdata_t cf;
1915	struct raid_softc *rs;
1916	int     unit;
1917
1918	unit = raidPtr->raidid;
1919
1920	rs = &raid_softc[unit];
1921
1922	/* XXX should check return code first... */
1923	rs->sc_flags |= RAIDF_INITED;
1924
1925	/* XXX doesn't check bounds. */
1926	snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1927
1928	/* attach the pseudo device */
1929	cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1930	cf->cf_name = raid_cd.cd_name;
1931	cf->cf_atname = raid_cd.cd_name;
1932	cf->cf_unit = unit;
1933	cf->cf_fstate = FSTATE_STAR;
1934
1935	rs->sc_dev = config_attach_pseudo(cf);
1936
1937	if (rs->sc_dev == NULL) {
1938		printf("raid%d: config_attach_pseudo failed\n",
1939		    raidPtr->raidid);
1940		rs->sc_flags &= ~RAIDF_INITED;
1941		free(cf, M_RAIDFRAME);
1942		return;
1943	}
1944
1945	/* disk_attach actually creates space for the CPU disklabel, among
1946	 * other things, so it's critical to call this *BEFORE* we try putzing
1947	 * with disklabels. */
1948
1949	disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1950	disk_attach(&rs->sc_dkdev);
1951	disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector);
1952
1953	/* XXX There may be a weird interaction here between this, and
1954	 * protectedSectors, as used in RAIDframe.  */
1955
1956	rs->sc_size = raidPtr->totalSectors;
1957
1958	dkwedge_discover(&rs->sc_dkdev);
1959
1960	rf_set_properties(rs, raidPtr);
1961
1962}
1963#if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1964/* wake up the daemon & tell it to get us a spare table
1965 * XXX
1966 * the entries in the queues should be tagged with the raidPtr
1967 * so that in the extremely rare case that two recons happen at once,
1968 * we know for which device were requesting a spare table
1969 * XXX
1970 *
1971 * XXX This code is not currently used. GO
1972 */
1973int
1974rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1975{
1976	int     retcode;
1977
1978	rf_lock_mutex2(rf_sparet_wait_mutex);
1979	req->next = rf_sparet_wait_queue;
1980	rf_sparet_wait_queue = req;
1981	rf_broadcast_cond2(rf_sparet_wait_cv);
1982
1983	/* mpsleep unlocks the mutex */
1984	while (!rf_sparet_resp_queue) {
1985		rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1986	}
1987	req = rf_sparet_resp_queue;
1988	rf_sparet_resp_queue = req->next;
1989	rf_unlock_mutex2(rf_sparet_wait_mutex);
1990
1991	retcode = req->fcol;
1992	RF_Free(req, sizeof(*req));	/* this is not the same req as we
1993					 * alloc'd */
1994	return (retcode);
1995}
1996#endif
1997
1998/* a wrapper around rf_DoAccess that extracts appropriate info from the
1999 * bp & passes it down.
2000 * any calls originating in the kernel must use non-blocking I/O
2001 * do some extra sanity checking to return "appropriate" error values for
2002 * certain conditions (to make some standard utilities work)
2003 *
2004 * Formerly known as: rf_DoAccessKernel
2005 */
2006void
2007raidstart(RF_Raid_t *raidPtr)
2008{
2009	RF_SectorCount_t num_blocks, pb, sum;
2010	RF_RaidAddr_t raid_addr;
2011	struct partition *pp;
2012	daddr_t blocknum;
2013	int     unit;
2014	struct raid_softc *rs;
2015	int     do_async;
2016	struct buf *bp;
2017	int rc;
2018
2019	unit = raidPtr->raidid;
2020	rs = &raid_softc[unit];
2021
2022	/* quick check to see if anything has died recently */
2023	rf_lock_mutex2(raidPtr->mutex);
2024	if (raidPtr->numNewFailures > 0) {
2025		rf_unlock_mutex2(raidPtr->mutex);
2026		rf_update_component_labels(raidPtr,
2027					   RF_NORMAL_COMPONENT_UPDATE);
2028		rf_lock_mutex2(raidPtr->mutex);
2029		raidPtr->numNewFailures--;
2030	}
2031
2032	/* Check to see if we're at the limit... */
2033	while (raidPtr->openings > 0) {
2034		rf_unlock_mutex2(raidPtr->mutex);
2035
2036		/* get the next item, if any, from the queue */
2037		if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2038			/* nothing more to do */
2039			return;
2040		}
2041
2042		/* Ok, for the bp we have here, bp->b_blkno is relative to the
2043		 * partition.. Need to make it absolute to the underlying
2044		 * device.. */
2045
2046		blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2047		if (DISKPART(bp->b_dev) != RAW_PART) {
2048			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2049			blocknum += pp->p_offset;
2050		}
2051
2052		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2053			    (int) blocknum));
2054
2055		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2056		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2057
2058		/* *THIS* is where we adjust what block we're going to...
2059		 * but DO NOT TOUCH bp->b_blkno!!! */
2060		raid_addr = blocknum;
2061
2062		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2063		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2064		sum = raid_addr + num_blocks + pb;
2065		if (1 || rf_debugKernelAccess) {
2066			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2067				    (int) raid_addr, (int) sum, (int) num_blocks,
2068				    (int) pb, (int) bp->b_resid));
2069		}
2070		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2071		    || (sum < num_blocks) || (sum < pb)) {
2072			bp->b_error = ENOSPC;
2073			bp->b_resid = bp->b_bcount;
2074			biodone(bp);
2075			rf_lock_mutex2(raidPtr->mutex);
2076			continue;
2077		}
2078		/*
2079		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2080		 */
2081
2082		if (bp->b_bcount & raidPtr->sectorMask) {
2083			bp->b_error = EINVAL;
2084			bp->b_resid = bp->b_bcount;
2085			biodone(bp);
2086			rf_lock_mutex2(raidPtr->mutex);
2087			continue;
2088
2089		}
2090		db1_printf(("Calling DoAccess..\n"));
2091
2092
2093		rf_lock_mutex2(raidPtr->mutex);
2094		raidPtr->openings--;
2095		rf_unlock_mutex2(raidPtr->mutex);
2096
2097		/*
2098		 * Everything is async.
2099		 */
2100		do_async = 1;
2101
2102		disk_busy(&rs->sc_dkdev);
2103
2104		/* XXX we're still at splbio() here... do we *really*
2105		   need to be? */
2106
2107		/* don't ever condition on bp->b_flags & B_WRITE.
2108		 * always condition on B_READ instead */
2109
2110		rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2111				 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2112				 do_async, raid_addr, num_blocks,
2113				 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2114
2115		if (rc) {
2116			bp->b_error = rc;
2117			bp->b_resid = bp->b_bcount;
2118			biodone(bp);
2119			/* continue loop */
2120		}
2121
2122		rf_lock_mutex2(raidPtr->mutex);
2123	}
2124	rf_unlock_mutex2(raidPtr->mutex);
2125}
2126
2127
2128
2129
2130/* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
2131
2132int
2133rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2134{
2135	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2136	struct buf *bp;
2137
2138	req->queue = queue;
2139	bp = req->bp;
2140
2141	switch (req->type) {
2142	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
2143		/* XXX need to do something extra here.. */
2144		/* I'm leaving this in, as I've never actually seen it used,
2145		 * and I'd like folks to report it... GO */
2146		printf(("WAKEUP CALLED\n"));
2147		queue->numOutstanding++;
2148
2149		bp->b_flags = 0;
2150		bp->b_private = req;
2151
2152		KernelWakeupFunc(bp);
2153		break;
2154
2155	case RF_IO_TYPE_READ:
2156	case RF_IO_TYPE_WRITE:
2157#if RF_ACC_TRACE > 0
2158		if (req->tracerec) {
2159			RF_ETIMER_START(req->tracerec->timer);
2160		}
2161#endif
2162		InitBP(bp, queue->rf_cinfo->ci_vp,
2163		    op, queue->rf_cinfo->ci_dev,
2164		    req->sectorOffset, req->numSector,
2165		    req->buf, KernelWakeupFunc, (void *) req,
2166		    queue->raidPtr->logBytesPerSector, req->b_proc);
2167
2168		if (rf_debugKernelAccess) {
2169			db1_printf(("dispatch: bp->b_blkno = %ld\n",
2170				(long) bp->b_blkno));
2171		}
2172		queue->numOutstanding++;
2173		queue->last_deq_sector = req->sectorOffset;
2174		/* acc wouldn't have been let in if there were any pending
2175		 * reqs at any other priority */
2176		queue->curPriority = req->priority;
2177
2178		db1_printf(("Going for %c to unit %d col %d\n",
2179			    req->type, queue->raidPtr->raidid,
2180			    queue->col));
2181		db1_printf(("sector %d count %d (%d bytes) %d\n",
2182			(int) req->sectorOffset, (int) req->numSector,
2183			(int) (req->numSector <<
2184			    queue->raidPtr->logBytesPerSector),
2185			(int) queue->raidPtr->logBytesPerSector));
2186
2187		/*
2188		 * XXX: drop lock here since this can block at
2189		 * least with backing SCSI devices.  Retake it
2190		 * to minimize fuss with calling interfaces.
2191		 */
2192
2193		RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2194		bdev_strategy(bp);
2195		RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2196		break;
2197
2198	default:
2199		panic("bad req->type in rf_DispatchKernelIO");
2200	}
2201	db1_printf(("Exiting from DispatchKernelIO\n"));
2202
2203	return (0);
2204}
2205/* this is the callback function associated with a I/O invoked from
2206   kernel code.
2207 */
2208static void
2209KernelWakeupFunc(struct buf *bp)
2210{
2211	RF_DiskQueueData_t *req = NULL;
2212	RF_DiskQueue_t *queue;
2213
2214	db1_printf(("recovering the request queue:\n"));
2215
2216	req = bp->b_private;
2217
2218	queue = (RF_DiskQueue_t *) req->queue;
2219
2220	rf_lock_mutex2(queue->raidPtr->iodone_lock);
2221
2222#if RF_ACC_TRACE > 0
2223	if (req->tracerec) {
2224		RF_ETIMER_STOP(req->tracerec->timer);
2225		RF_ETIMER_EVAL(req->tracerec->timer);
2226		rf_lock_mutex2(rf_tracing_mutex);
2227		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2228		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2229		req->tracerec->num_phys_ios++;
2230		rf_unlock_mutex2(rf_tracing_mutex);
2231	}
2232#endif
2233
2234	/* XXX Ok, let's get aggressive... If b_error is set, let's go
2235	 * ballistic, and mark the component as hosed... */
2236
2237	if (bp->b_error != 0) {
2238		/* Mark the disk as dead */
2239		/* but only mark it once... */
2240		/* and only if it wouldn't leave this RAID set
2241		   completely broken */
2242		if (((queue->raidPtr->Disks[queue->col].status ==
2243		      rf_ds_optimal) ||
2244		     (queue->raidPtr->Disks[queue->col].status ==
2245		      rf_ds_used_spare)) &&
2246		     (queue->raidPtr->numFailures <
2247		      queue->raidPtr->Layout.map->faultsTolerated)) {
2248			printf("raid%d: IO Error.  Marking %s as failed.\n",
2249			       queue->raidPtr->raidid,
2250			       queue->raidPtr->Disks[queue->col].devname);
2251			queue->raidPtr->Disks[queue->col].status =
2252			    rf_ds_failed;
2253			queue->raidPtr->status = rf_rs_degraded;
2254			queue->raidPtr->numFailures++;
2255			queue->raidPtr->numNewFailures++;
2256		} else {	/* Disk is already dead... */
2257			/* printf("Disk already marked as dead!\n"); */
2258		}
2259
2260	}
2261
2262	/* Fill in the error value */
2263	req->error = bp->b_error;
2264
2265	/* Drop this one on the "finished" queue... */
2266	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2267
2268	/* Let the raidio thread know there is work to be done. */
2269	rf_signal_cond2(queue->raidPtr->iodone_cv);
2270
2271	rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2272}
2273
2274
2275/*
2276 * initialize a buf structure for doing an I/O in the kernel.
2277 */
2278static void
2279InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2280       RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2281       void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2282       struct proc *b_proc)
2283{
2284	/* bp->b_flags       = B_PHYS | rw_flag; */
2285	bp->b_flags = rw_flag;	/* XXX need B_PHYS here too??? */
2286	bp->b_oflags = 0;
2287	bp->b_cflags = 0;
2288	bp->b_bcount = numSect << logBytesPerSector;
2289	bp->b_bufsize = bp->b_bcount;
2290	bp->b_error = 0;
2291	bp->b_dev = dev;
2292	bp->b_data = bf;
2293	bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2294	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
2295	if (bp->b_bcount == 0) {
2296		panic("bp->b_bcount is zero in InitBP!!");
2297	}
2298	bp->b_proc = b_proc;
2299	bp->b_iodone = cbFunc;
2300	bp->b_private = cbArg;
2301}
2302
2303static void
2304raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2305		    struct disklabel *lp)
2306{
2307	memset(lp, 0, sizeof(*lp));
2308
2309	/* fabricate a label... */
2310	lp->d_secperunit = raidPtr->totalSectors;
2311	lp->d_secsize = raidPtr->bytesPerSector;
2312	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2313	lp->d_ntracks = 4 * raidPtr->numCol;
2314	lp->d_ncylinders = raidPtr->totalSectors /
2315		(lp->d_nsectors * lp->d_ntracks);
2316	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2317
2318	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2319	lp->d_type = DTYPE_RAID;
2320	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2321	lp->d_rpm = 3600;
2322	lp->d_interleave = 1;
2323	lp->d_flags = 0;
2324
2325	lp->d_partitions[RAW_PART].p_offset = 0;
2326	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2327	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2328	lp->d_npartitions = RAW_PART + 1;
2329
2330	lp->d_magic = DISKMAGIC;
2331	lp->d_magic2 = DISKMAGIC;
2332	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2333
2334}
2335/*
2336 * Read the disklabel from the raid device.  If one is not present, fake one
2337 * up.
2338 */
2339static void
2340raidgetdisklabel(dev_t dev)
2341{
2342	int     unit = raidunit(dev);
2343	struct raid_softc *rs = &raid_softc[unit];
2344	const char   *errstring;
2345	struct disklabel *lp = rs->sc_dkdev.dk_label;
2346	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2347	RF_Raid_t *raidPtr;
2348
2349	db1_printf(("Getting the disklabel...\n"));
2350
2351	memset(clp, 0, sizeof(*clp));
2352
2353	raidPtr = raidPtrs[unit];
2354
2355	raidgetdefaultlabel(raidPtr, rs, lp);
2356
2357	/*
2358	 * Call the generic disklabel extraction routine.
2359	 */
2360	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2361	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2362	if (errstring)
2363		raidmakedisklabel(rs);
2364	else {
2365		int     i;
2366		struct partition *pp;
2367
2368		/*
2369		 * Sanity check whether the found disklabel is valid.
2370		 *
2371		 * This is necessary since total size of the raid device
2372		 * may vary when an interleave is changed even though exactly
2373		 * same components are used, and old disklabel may used
2374		 * if that is found.
2375		 */
2376		if (lp->d_secperunit != rs->sc_size)
2377			printf("raid%d: WARNING: %s: "
2378			    "total sector size in disklabel (%" PRIu32 ") != "
2379			    "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname,
2380			    lp->d_secperunit, rs->sc_size);
2381		for (i = 0; i < lp->d_npartitions; i++) {
2382			pp = &lp->d_partitions[i];
2383			if (pp->p_offset + pp->p_size > rs->sc_size)
2384				printf("raid%d: WARNING: %s: end of partition `%c' "
2385				       "exceeds the size of raid (%" PRIu64 ")\n",
2386				       unit, rs->sc_xname, 'a' + i, rs->sc_size);
2387		}
2388	}
2389
2390}
2391/*
2392 * Take care of things one might want to take care of in the event
2393 * that a disklabel isn't present.
2394 */
2395static void
2396raidmakedisklabel(struct raid_softc *rs)
2397{
2398	struct disklabel *lp = rs->sc_dkdev.dk_label;
2399	db1_printf(("Making a label..\n"));
2400
2401	/*
2402	 * For historical reasons, if there's no disklabel present
2403	 * the raw partition must be marked FS_BSDFFS.
2404	 */
2405
2406	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2407
2408	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2409
2410	lp->d_checksum = dkcksum(lp);
2411}
2412/*
2413 * Wait interruptibly for an exclusive lock.
2414 *
2415 * XXX
2416 * Several drivers do this; it should be abstracted and made MP-safe.
2417 * (Hmm... where have we seen this warning before :->  GO )
2418 */
2419static int
2420raidlock(struct raid_softc *rs)
2421{
2422	int     error;
2423
2424	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2425		rs->sc_flags |= RAIDF_WANTED;
2426		if ((error =
2427			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2428			return (error);
2429	}
2430	rs->sc_flags |= RAIDF_LOCKED;
2431	return (0);
2432}
2433/*
2434 * Unlock and wake up any waiters.
2435 */
2436static void
2437raidunlock(struct raid_softc *rs)
2438{
2439
2440	rs->sc_flags &= ~RAIDF_LOCKED;
2441	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2442		rs->sc_flags &= ~RAIDF_WANTED;
2443		wakeup(rs);
2444	}
2445}
2446
2447
2448#define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
2449#define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
2450#define RF_PARITY_MAP_SIZE   RF_PARITYMAP_NBYTE
2451
2452static daddr_t
2453rf_component_info_offset(void)
2454{
2455
2456	return RF_COMPONENT_INFO_OFFSET;
2457}
2458
2459static daddr_t
2460rf_component_info_size(unsigned secsize)
2461{
2462	daddr_t info_size;
2463
2464	KASSERT(secsize);
2465	if (secsize > RF_COMPONENT_INFO_SIZE)
2466		info_size = secsize;
2467	else
2468		info_size = RF_COMPONENT_INFO_SIZE;
2469
2470	return info_size;
2471}
2472
2473static daddr_t
2474rf_parity_map_offset(RF_Raid_t *raidPtr)
2475{
2476	daddr_t map_offset;
2477
2478	KASSERT(raidPtr->bytesPerSector);
2479	if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2480		map_offset = raidPtr->bytesPerSector;
2481	else
2482		map_offset = RF_COMPONENT_INFO_SIZE;
2483	map_offset += rf_component_info_offset();
2484
2485	return map_offset;
2486}
2487
2488static daddr_t
2489rf_parity_map_size(RF_Raid_t *raidPtr)
2490{
2491	daddr_t map_size;
2492
2493	if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2494		map_size = raidPtr->bytesPerSector;
2495	else
2496		map_size = RF_PARITY_MAP_SIZE;
2497
2498	return map_size;
2499}
2500
2501int
2502raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2503{
2504	RF_ComponentLabel_t *clabel;
2505
2506	clabel = raidget_component_label(raidPtr, col);
2507	clabel->clean = RF_RAID_CLEAN;
2508	raidflush_component_label(raidPtr, col);
2509	return(0);
2510}
2511
2512
2513int
2514raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2515{
2516	RF_ComponentLabel_t *clabel;
2517
2518	clabel = raidget_component_label(raidPtr, col);
2519	clabel->clean = RF_RAID_DIRTY;
2520	raidflush_component_label(raidPtr, col);
2521	return(0);
2522}
2523
2524int
2525raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2526{
2527	KASSERT(raidPtr->bytesPerSector);
2528	return raidread_component_label(raidPtr->bytesPerSector,
2529	    raidPtr->Disks[col].dev,
2530	    raidPtr->raid_cinfo[col].ci_vp,
2531	    &raidPtr->raid_cinfo[col].ci_label);
2532}
2533
2534RF_ComponentLabel_t *
2535raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2536{
2537	return &raidPtr->raid_cinfo[col].ci_label;
2538}
2539
2540int
2541raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2542{
2543	RF_ComponentLabel_t *label;
2544
2545	label = &raidPtr->raid_cinfo[col].ci_label;
2546	label->mod_counter = raidPtr->mod_counter;
2547#ifndef RF_NO_PARITY_MAP
2548	label->parity_map_modcount = label->mod_counter;
2549#endif
2550	return raidwrite_component_label(raidPtr->bytesPerSector,
2551	    raidPtr->Disks[col].dev,
2552	    raidPtr->raid_cinfo[col].ci_vp, label);
2553}
2554
2555
2556static int
2557raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2558    RF_ComponentLabel_t *clabel)
2559{
2560	return raidread_component_area(dev, b_vp, clabel,
2561	    sizeof(RF_ComponentLabel_t),
2562	    rf_component_info_offset(),
2563	    rf_component_info_size(secsize));
2564}
2565
2566/* ARGSUSED */
2567static int
2568raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2569    size_t msize, daddr_t offset, daddr_t dsize)
2570{
2571	struct buf *bp;
2572	const struct bdevsw *bdev;
2573	int error;
2574
2575	/* XXX should probably ensure that we don't try to do this if
2576	   someone has changed rf_protected_sectors. */
2577
2578	if (b_vp == NULL) {
2579		/* For whatever reason, this component is not valid.
2580		   Don't try to read a component label from it. */
2581		return(EINVAL);
2582	}
2583
2584	/* get a block of the appropriate size... */
2585	bp = geteblk((int)dsize);
2586	bp->b_dev = dev;
2587
2588	/* get our ducks in a row for the read */
2589	bp->b_blkno = offset / DEV_BSIZE;
2590	bp->b_bcount = dsize;
2591	bp->b_flags |= B_READ;
2592 	bp->b_resid = dsize;
2593
2594	bdev = bdevsw_lookup(bp->b_dev);
2595	if (bdev == NULL)
2596		return (ENXIO);
2597	(*bdev->d_strategy)(bp);
2598
2599	error = biowait(bp);
2600
2601	if (!error) {
2602		memcpy(data, bp->b_data, msize);
2603	}
2604
2605	brelse(bp, 0);
2606	return(error);
2607}
2608
2609
2610static int
2611raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2612    RF_ComponentLabel_t *clabel)
2613{
2614	return raidwrite_component_area(dev, b_vp, clabel,
2615	    sizeof(RF_ComponentLabel_t),
2616	    rf_component_info_offset(),
2617	    rf_component_info_size(secsize), 0);
2618}
2619
2620/* ARGSUSED */
2621static int
2622raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2623    size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2624{
2625	struct buf *bp;
2626	const struct bdevsw *bdev;
2627	int error;
2628
2629	/* get a block of the appropriate size... */
2630	bp = geteblk((int)dsize);
2631	bp->b_dev = dev;
2632
2633	/* get our ducks in a row for the write */
2634	bp->b_blkno = offset / DEV_BSIZE;
2635	bp->b_bcount = dsize;
2636	bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2637 	bp->b_resid = dsize;
2638
2639	memset(bp->b_data, 0, dsize);
2640	memcpy(bp->b_data, data, msize);
2641
2642	bdev = bdevsw_lookup(bp->b_dev);
2643	if (bdev == NULL)
2644		return (ENXIO);
2645	(*bdev->d_strategy)(bp);
2646	if (asyncp)
2647		return 0;
2648	error = biowait(bp);
2649	brelse(bp, 0);
2650	if (error) {
2651#if 1
2652		printf("Failed to write RAID component info!\n");
2653#endif
2654	}
2655
2656	return(error);
2657}
2658
2659void
2660rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2661{
2662	int c;
2663
2664	for (c = 0; c < raidPtr->numCol; c++) {
2665		/* Skip dead disks. */
2666		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2667			continue;
2668		/* XXXjld: what if an error occurs here? */
2669		raidwrite_component_area(raidPtr->Disks[c].dev,
2670		    raidPtr->raid_cinfo[c].ci_vp, map,
2671		    RF_PARITYMAP_NBYTE,
2672		    rf_parity_map_offset(raidPtr),
2673		    rf_parity_map_size(raidPtr), 0);
2674	}
2675}
2676
2677void
2678rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2679{
2680	struct rf_paritymap_ondisk tmp;
2681	int c,first;
2682
2683	first=1;
2684	for (c = 0; c < raidPtr->numCol; c++) {
2685		/* Skip dead disks. */
2686		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2687			continue;
2688		raidread_component_area(raidPtr->Disks[c].dev,
2689		    raidPtr->raid_cinfo[c].ci_vp, &tmp,
2690		    RF_PARITYMAP_NBYTE,
2691		    rf_parity_map_offset(raidPtr),
2692		    rf_parity_map_size(raidPtr));
2693		if (first) {
2694			memcpy(map, &tmp, sizeof(*map));
2695			first = 0;
2696		} else {
2697			rf_paritymap_merge(map, &tmp);
2698		}
2699	}
2700}
2701
2702void
2703rf_markalldirty(RF_Raid_t *raidPtr)
2704{
2705	RF_ComponentLabel_t *clabel;
2706	int sparecol;
2707	int c;
2708	int j;
2709	int scol = -1;
2710
2711	raidPtr->mod_counter++;
2712	for (c = 0; c < raidPtr->numCol; c++) {
2713		/* we don't want to touch (at all) a disk that has
2714		   failed */
2715		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2716			clabel = raidget_component_label(raidPtr, c);
2717			if (clabel->status == rf_ds_spared) {
2718				/* XXX do something special...
2719				   but whatever you do, don't
2720				   try to access it!! */
2721			} else {
2722				raidmarkdirty(raidPtr, c);
2723			}
2724		}
2725	}
2726
2727	for( c = 0; c < raidPtr->numSpare ; c++) {
2728		sparecol = raidPtr->numCol + c;
2729		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2730			/*
2731
2732			   we claim this disk is "optimal" if it's
2733			   rf_ds_used_spare, as that means it should be
2734			   directly substitutable for the disk it replaced.
2735			   We note that too...
2736
2737			 */
2738
2739			for(j=0;j<raidPtr->numCol;j++) {
2740				if (raidPtr->Disks[j].spareCol == sparecol) {
2741					scol = j;
2742					break;
2743				}
2744			}
2745
2746			clabel = raidget_component_label(raidPtr, sparecol);
2747			/* make sure status is noted */
2748
2749			raid_init_component_label(raidPtr, clabel);
2750
2751			clabel->row = 0;
2752			clabel->column = scol;
2753			/* Note: we *don't* change status from rf_ds_used_spare
2754			   to rf_ds_optimal */
2755			/* clabel.status = rf_ds_optimal; */
2756
2757			raidmarkdirty(raidPtr, sparecol);
2758		}
2759	}
2760}
2761
2762
2763void
2764rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2765{
2766	RF_ComponentLabel_t *clabel;
2767	int sparecol;
2768	int c;
2769	int j;
2770	int scol;
2771
2772	scol = -1;
2773
2774	/* XXX should do extra checks to make sure things really are clean,
2775	   rather than blindly setting the clean bit... */
2776
2777	raidPtr->mod_counter++;
2778
2779	for (c = 0; c < raidPtr->numCol; c++) {
2780		if (raidPtr->Disks[c].status == rf_ds_optimal) {
2781			clabel = raidget_component_label(raidPtr, c);
2782			/* make sure status is noted */
2783			clabel->status = rf_ds_optimal;
2784
2785			/* note what unit we are configured as */
2786			clabel->last_unit = raidPtr->raidid;
2787
2788			raidflush_component_label(raidPtr, c);
2789			if (final == RF_FINAL_COMPONENT_UPDATE) {
2790				if (raidPtr->parity_good == RF_RAID_CLEAN) {
2791					raidmarkclean(raidPtr, c);
2792				}
2793			}
2794		}
2795		/* else we don't touch it.. */
2796	}
2797
2798	for( c = 0; c < raidPtr->numSpare ; c++) {
2799		sparecol = raidPtr->numCol + c;
2800		/* Need to ensure that the reconstruct actually completed! */
2801		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2802			/*
2803
2804			   we claim this disk is "optimal" if it's
2805			   rf_ds_used_spare, as that means it should be
2806			   directly substitutable for the disk it replaced.
2807			   We note that too...
2808
2809			 */
2810
2811			for(j=0;j<raidPtr->numCol;j++) {
2812				if (raidPtr->Disks[j].spareCol == sparecol) {
2813					scol = j;
2814					break;
2815				}
2816			}
2817
2818			/* XXX shouldn't *really* need this... */
2819			clabel = raidget_component_label(raidPtr, sparecol);
2820			/* make sure status is noted */
2821
2822			raid_init_component_label(raidPtr, clabel);
2823
2824			clabel->column = scol;
2825			clabel->status = rf_ds_optimal;
2826			clabel->last_unit = raidPtr->raidid;
2827
2828			raidflush_component_label(raidPtr, sparecol);
2829			if (final == RF_FINAL_COMPONENT_UPDATE) {
2830				if (raidPtr->parity_good == RF_RAID_CLEAN) {
2831					raidmarkclean(raidPtr, sparecol);
2832				}
2833			}
2834		}
2835	}
2836}
2837
2838void
2839rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2840{
2841
2842	if (vp != NULL) {
2843		if (auto_configured == 1) {
2844			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2845			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2846			vput(vp);
2847
2848		} else {
2849			(void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2850		}
2851	}
2852}
2853
2854
2855void
2856rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2857{
2858	int r,c;
2859	struct vnode *vp;
2860	int acd;
2861
2862
2863	/* We take this opportunity to close the vnodes like we should.. */
2864
2865	for (c = 0; c < raidPtr->numCol; c++) {
2866		vp = raidPtr->raid_cinfo[c].ci_vp;
2867		acd = raidPtr->Disks[c].auto_configured;
2868		rf_close_component(raidPtr, vp, acd);
2869		raidPtr->raid_cinfo[c].ci_vp = NULL;
2870		raidPtr->Disks[c].auto_configured = 0;
2871	}
2872
2873	for (r = 0; r < raidPtr->numSpare; r++) {
2874		vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2875		acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2876		rf_close_component(raidPtr, vp, acd);
2877		raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2878		raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2879	}
2880}
2881
2882
2883void
2884rf_ReconThread(struct rf_recon_req *req)
2885{
2886	int     s;
2887	RF_Raid_t *raidPtr;
2888
2889	s = splbio();
2890	raidPtr = (RF_Raid_t *) req->raidPtr;
2891	raidPtr->recon_in_progress = 1;
2892
2893	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2894		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2895
2896	RF_Free(req, sizeof(*req));
2897
2898	raidPtr->recon_in_progress = 0;
2899	splx(s);
2900
2901	/* That's all... */
2902	kthread_exit(0);	/* does not return */
2903}
2904
2905void
2906rf_RewriteParityThread(RF_Raid_t *raidPtr)
2907{
2908	int retcode;
2909	int s;
2910
2911	raidPtr->parity_rewrite_stripes_done = 0;
2912	raidPtr->parity_rewrite_in_progress = 1;
2913	s = splbio();
2914	retcode = rf_RewriteParity(raidPtr);
2915	splx(s);
2916	if (retcode) {
2917		printf("raid%d: Error re-writing parity (%d)!\n",
2918		    raidPtr->raidid, retcode);
2919	} else {
2920		/* set the clean bit!  If we shutdown correctly,
2921		   the clean bit on each component label will get
2922		   set */
2923		raidPtr->parity_good = RF_RAID_CLEAN;
2924	}
2925	raidPtr->parity_rewrite_in_progress = 0;
2926
2927	/* Anyone waiting for us to stop?  If so, inform them... */
2928	if (raidPtr->waitShutdown) {
2929		wakeup(&raidPtr->parity_rewrite_in_progress);
2930	}
2931
2932	/* That's all... */
2933	kthread_exit(0);	/* does not return */
2934}
2935
2936
2937void
2938rf_CopybackThread(RF_Raid_t *raidPtr)
2939{
2940	int s;
2941
2942	raidPtr->copyback_in_progress = 1;
2943	s = splbio();
2944	rf_CopybackReconstructedData(raidPtr);
2945	splx(s);
2946	raidPtr->copyback_in_progress = 0;
2947
2948	/* That's all... */
2949	kthread_exit(0);	/* does not return */
2950}
2951
2952
2953void
2954rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2955{
2956	int s;
2957	RF_Raid_t *raidPtr;
2958
2959	s = splbio();
2960	raidPtr = req->raidPtr;
2961	raidPtr->recon_in_progress = 1;
2962	rf_ReconstructInPlace(raidPtr, req->col);
2963	RF_Free(req, sizeof(*req));
2964	raidPtr->recon_in_progress = 0;
2965	splx(s);
2966
2967	/* That's all... */
2968	kthread_exit(0);	/* does not return */
2969}
2970
2971static RF_AutoConfig_t *
2972rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2973    const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2974    unsigned secsize)
2975{
2976	int good_one = 0;
2977	RF_ComponentLabel_t *clabel;
2978	RF_AutoConfig_t *ac;
2979
2980	clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2981	if (clabel == NULL) {
2982oomem:
2983		    while(ac_list) {
2984			    ac = ac_list;
2985			    if (ac->clabel)
2986				    free(ac->clabel, M_RAIDFRAME);
2987			    ac_list = ac_list->next;
2988			    free(ac, M_RAIDFRAME);
2989		    }
2990		    printf("RAID auto config: out of memory!\n");
2991		    return NULL; /* XXX probably should panic? */
2992	}
2993
2994	if (!raidread_component_label(secsize, dev, vp, clabel)) {
2995		/* Got the label.  Does it look reasonable? */
2996		if (rf_reasonable_label(clabel, numsecs) &&
2997		    (rf_component_label_partitionsize(clabel) <= size)) {
2998#ifdef DEBUG
2999			printf("Component on: %s: %llu\n",
3000				cname, (unsigned long long)size);
3001			rf_print_component_label(clabel);
3002#endif
3003			/* if it's reasonable, add it, else ignore it. */
3004			ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3005				M_NOWAIT);
3006			if (ac == NULL) {
3007				free(clabel, M_RAIDFRAME);
3008				goto oomem;
3009			}
3010			strlcpy(ac->devname, cname, sizeof(ac->devname));
3011			ac->dev = dev;
3012			ac->vp = vp;
3013			ac->clabel = clabel;
3014			ac->next = ac_list;
3015			ac_list = ac;
3016			good_one = 1;
3017		}
3018	}
3019	if (!good_one) {
3020		/* cleanup */
3021		free(clabel, M_RAIDFRAME);
3022		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3023		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3024		vput(vp);
3025	}
3026	return ac_list;
3027}
3028
3029RF_AutoConfig_t *
3030rf_find_raid_components(void)
3031{
3032	struct vnode *vp;
3033	struct disklabel label;
3034	device_t dv;
3035	deviter_t di;
3036	dev_t dev;
3037	int bmajor, bminor, wedge, rf_part_found;
3038	int error;
3039	int i;
3040	RF_AutoConfig_t *ac_list;
3041	uint64_t numsecs;
3042	unsigned secsize;
3043
3044	/* initialize the AutoConfig list */
3045	ac_list = NULL;
3046
3047	/* we begin by trolling through *all* the devices on the system */
3048
3049	for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3050	     dv = deviter_next(&di)) {
3051
3052		/* we are only interested in disks... */
3053		if (device_class(dv) != DV_DISK)
3054			continue;
3055
3056		/* we don't care about floppies... */
3057		if (device_is_a(dv, "fd")) {
3058			continue;
3059		}
3060
3061		/* we don't care about CD's... */
3062		if (device_is_a(dv, "cd")) {
3063			continue;
3064		}
3065
3066		/* we don't care about md's... */
3067		if (device_is_a(dv, "md")) {
3068			continue;
3069		}
3070
3071		/* hdfd is the Atari/Hades floppy driver */
3072		if (device_is_a(dv, "hdfd")) {
3073			continue;
3074		}
3075
3076		/* fdisa is the Atari/Milan floppy driver */
3077		if (device_is_a(dv, "fdisa")) {
3078			continue;
3079		}
3080
3081		/* need to find the device_name_to_block_device_major stuff */
3082		bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3083
3084		rf_part_found = 0; /*No raid partition as yet*/
3085
3086		/* get a vnode for the raw partition of this disk */
3087
3088		wedge = device_is_a(dv, "dk");
3089		bminor = minor(device_unit(dv));
3090		dev = wedge ? makedev(bmajor, bminor) :
3091		    MAKEDISKDEV(bmajor, bminor, RAW_PART);
3092		if (bdevvp(dev, &vp))
3093			panic("RAID can't alloc vnode");
3094
3095		error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3096
3097		if (error) {
3098			/* "Who cares."  Continue looking
3099			   for something that exists*/
3100			vput(vp);
3101			continue;
3102		}
3103
3104		error = getdisksize(vp, &numsecs, &secsize);
3105		if (error) {
3106			vput(vp);
3107			continue;
3108		}
3109		if (wedge) {
3110			struct dkwedge_info dkw;
3111			error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3112			    NOCRED);
3113			if (error) {
3114				printf("RAIDframe: can't get wedge info for "
3115				    "dev %s (%d)\n", device_xname(dv), error);
3116				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3117				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3118				vput(vp);
3119				continue;
3120			}
3121
3122			if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3123				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3124				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3125				vput(vp);
3126				continue;
3127			}
3128
3129			ac_list = rf_get_component(ac_list, dev, vp,
3130			    device_xname(dv), dkw.dkw_size, numsecs, secsize);
3131			rf_part_found = 1; /*There is a raid component on this disk*/
3132			continue;
3133		}
3134
3135		/* Ok, the disk exists.  Go get the disklabel. */
3136		error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3137		if (error) {
3138			/*
3139			 * XXX can't happen - open() would
3140			 * have errored out (or faked up one)
3141			 */
3142			if (error != ENOTTY)
3143				printf("RAIDframe: can't get label for dev "
3144				    "%s (%d)\n", device_xname(dv), error);
3145		}
3146
3147		/* don't need this any more.  We'll allocate it again
3148		   a little later if we really do... */
3149		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3150		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3151		vput(vp);
3152
3153		if (error)
3154			continue;
3155
3156		rf_part_found = 0; /*No raid partitions yet*/
3157		for (i = 0; i < label.d_npartitions; i++) {
3158			char cname[sizeof(ac_list->devname)];
3159
3160			/* We only support partitions marked as RAID */
3161			if (label.d_partitions[i].p_fstype != FS_RAID)
3162				continue;
3163
3164			dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3165			if (bdevvp(dev, &vp))
3166				panic("RAID can't alloc vnode");
3167
3168			error = VOP_OPEN(vp, FREAD, NOCRED);
3169			if (error) {
3170				/* Whatever... */
3171				vput(vp);
3172				continue;
3173			}
3174			snprintf(cname, sizeof(cname), "%s%c",
3175			    device_xname(dv), 'a' + i);
3176			ac_list = rf_get_component(ac_list, dev, vp, cname,
3177				label.d_partitions[i].p_size, numsecs, secsize);
3178				rf_part_found = 1; /*There is at least one raid partition on this disk*/
3179		}
3180
3181		/*
3182		 *If there is no raid component on this disk, either in a
3183		 *disklabel or inside a wedge, check the raw partition as well,
3184		 *as it is possible to configure raid components on raw disk
3185		 *devices.
3186		 */
3187
3188		if (!rf_part_found) {
3189			char cname[sizeof(ac_list->devname)];
3190
3191			dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3192			if (bdevvp(dev, &vp))
3193				panic("RAID can't alloc vnode");
3194
3195			error = VOP_OPEN(vp, FREAD, NOCRED);
3196			if (error) {
3197				/* Whatever... */
3198				vput(vp);
3199				continue;
3200			}
3201			snprintf(cname, sizeof(cname), "%s%c",
3202			    device_xname(dv), 'a' + RAW_PART);
3203			ac_list = rf_get_component(ac_list, dev, vp, cname,
3204				label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3205		}
3206	}
3207	deviter_release(&di);
3208	return ac_list;
3209}
3210
3211
3212int
3213rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3214{
3215
3216	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3217	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3218	    ((clabel->clean == RF_RAID_CLEAN) ||
3219	     (clabel->clean == RF_RAID_DIRTY)) &&
3220	    clabel->row >=0 &&
3221	    clabel->column >= 0 &&
3222	    clabel->num_rows > 0 &&
3223	    clabel->num_columns > 0 &&
3224	    clabel->row < clabel->num_rows &&
3225	    clabel->column < clabel->num_columns &&
3226	    clabel->blockSize > 0 &&
3227	    /*
3228	     * numBlocksHi may contain garbage, but it is ok since
3229	     * the type is unsigned.  If it is really garbage,
3230	     * rf_fix_old_label_size() will fix it.
3231	     */
3232	    rf_component_label_numblocks(clabel) > 0) {
3233		/*
3234		 * label looks reasonable enough...
3235		 * let's make sure it has no old garbage.
3236		 */
3237		if (numsecs)
3238			rf_fix_old_label_size(clabel, numsecs);
3239		return(1);
3240	}
3241	return(0);
3242}
3243
3244
3245/*
3246 * For reasons yet unknown, some old component labels have garbage in
3247 * the newer numBlocksHi region, and this causes lossage.  Since those
3248 * disks will also have numsecs set to less than 32 bits of sectors,
3249 * we can determine when this corruption has occured, and fix it.
3250 *
3251 * The exact same problem, with the same unknown reason, happens to
3252 * the partitionSizeHi member as well.
3253 */
3254static void
3255rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3256{
3257
3258	if (numsecs < ((uint64_t)1 << 32)) {
3259		if (clabel->numBlocksHi) {
3260			printf("WARNING: total sectors < 32 bits, yet "
3261			       "numBlocksHi set\n"
3262			       "WARNING: resetting numBlocksHi to zero.\n");
3263			clabel->numBlocksHi = 0;
3264		}
3265
3266		if (clabel->partitionSizeHi) {
3267			printf("WARNING: total sectors < 32 bits, yet "
3268			       "partitionSizeHi set\n"
3269			       "WARNING: resetting partitionSizeHi to zero.\n");
3270			clabel->partitionSizeHi = 0;
3271		}
3272	}
3273}
3274
3275
3276#ifdef DEBUG
3277void
3278rf_print_component_label(RF_ComponentLabel_t *clabel)
3279{
3280	uint64_t numBlocks;
3281
3282	numBlocks = rf_component_label_numblocks(clabel);
3283
3284	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3285	       clabel->row, clabel->column,
3286	       clabel->num_rows, clabel->num_columns);
3287	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
3288	       clabel->version, clabel->serial_number,
3289	       clabel->mod_counter);
3290	printf("   Clean: %s Status: %d\n",
3291	       clabel->clean ? "Yes" : "No", clabel->status);
3292	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3293	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3294	printf("   RAID Level: %c  blocksize: %d numBlocks: %"PRIu64"\n",
3295	       (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3296	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3297	printf("   Contains root partition: %s\n",
3298	       clabel->root_partition ? "Yes" : "No");
3299	printf("   Last configured as: raid%d\n", clabel->last_unit);
3300#if 0
3301	   printf("   Config order: %d\n", clabel->config_order);
3302#endif
3303
3304}
3305#endif
3306
3307RF_ConfigSet_t *
3308rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3309{
3310	RF_AutoConfig_t *ac;
3311	RF_ConfigSet_t *config_sets;
3312	RF_ConfigSet_t *cset;
3313	RF_AutoConfig_t *ac_next;
3314
3315
3316	config_sets = NULL;
3317
3318	/* Go through the AutoConfig list, and figure out which components
3319	   belong to what sets.  */
3320	ac = ac_list;
3321	while(ac!=NULL) {
3322		/* we're going to putz with ac->next, so save it here
3323		   for use at the end of the loop */
3324		ac_next = ac->next;
3325
3326		if (config_sets == NULL) {
3327			/* will need at least this one... */
3328			config_sets = (RF_ConfigSet_t *)
3329				malloc(sizeof(RF_ConfigSet_t),
3330				       M_RAIDFRAME, M_NOWAIT);
3331			if (config_sets == NULL) {
3332				panic("rf_create_auto_sets: No memory!");
3333			}
3334			/* this one is easy :) */
3335			config_sets->ac = ac;
3336			config_sets->next = NULL;
3337			config_sets->rootable = 0;
3338			ac->next = NULL;
3339		} else {
3340			/* which set does this component fit into? */
3341			cset = config_sets;
3342			while(cset!=NULL) {
3343				if (rf_does_it_fit(cset, ac)) {
3344					/* looks like it matches... */
3345					ac->next = cset->ac;
3346					cset->ac = ac;
3347					break;
3348				}
3349				cset = cset->next;
3350			}
3351			if (cset==NULL) {
3352				/* didn't find a match above... new set..*/
3353				cset = (RF_ConfigSet_t *)
3354					malloc(sizeof(RF_ConfigSet_t),
3355					       M_RAIDFRAME, M_NOWAIT);
3356				if (cset == NULL) {
3357					panic("rf_create_auto_sets: No memory!");
3358				}
3359				cset->ac = ac;
3360				ac->next = NULL;
3361				cset->next = config_sets;
3362				cset->rootable = 0;
3363				config_sets = cset;
3364			}
3365		}
3366		ac = ac_next;
3367	}
3368
3369
3370	return(config_sets);
3371}
3372
3373static int
3374rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3375{
3376	RF_ComponentLabel_t *clabel1, *clabel2;
3377
3378	/* If this one matches the *first* one in the set, that's good
3379	   enough, since the other members of the set would have been
3380	   through here too... */
3381	/* note that we are not checking partitionSize here..
3382
3383	   Note that we are also not checking the mod_counters here.
3384	   If everything else matches execpt the mod_counter, that's
3385	   good enough for this test.  We will deal with the mod_counters
3386	   a little later in the autoconfiguration process.
3387
3388	    (clabel1->mod_counter == clabel2->mod_counter) &&
3389
3390	   The reason we don't check for this is that failed disks
3391	   will have lower modification counts.  If those disks are
3392	   not added to the set they used to belong to, then they will
3393	   form their own set, which may result in 2 different sets,
3394	   for example, competing to be configured at raid0, and
3395	   perhaps competing to be the root filesystem set.  If the
3396	   wrong ones get configured, or both attempt to become /,
3397	   weird behaviour and or serious lossage will occur.  Thus we
3398	   need to bring them into the fold here, and kick them out at
3399	   a later point.
3400
3401	*/
3402
3403	clabel1 = cset->ac->clabel;
3404	clabel2 = ac->clabel;
3405	if ((clabel1->version == clabel2->version) &&
3406	    (clabel1->serial_number == clabel2->serial_number) &&
3407	    (clabel1->num_rows == clabel2->num_rows) &&
3408	    (clabel1->num_columns == clabel2->num_columns) &&
3409	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
3410	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3411	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3412	    (clabel1->parityConfig == clabel2->parityConfig) &&
3413	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3414	    (clabel1->blockSize == clabel2->blockSize) &&
3415	    rf_component_label_numblocks(clabel1) ==
3416	    rf_component_label_numblocks(clabel2) &&
3417	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
3418	    (clabel1->root_partition == clabel2->root_partition) &&
3419	    (clabel1->last_unit == clabel2->last_unit) &&
3420	    (clabel1->config_order == clabel2->config_order)) {
3421		/* if it get's here, it almost *has* to be a match */
3422	} else {
3423		/* it's not consistent with somebody in the set..
3424		   punt */
3425		return(0);
3426	}
3427	/* all was fine.. it must fit... */
3428	return(1);
3429}
3430
3431int
3432rf_have_enough_components(RF_ConfigSet_t *cset)
3433{
3434	RF_AutoConfig_t *ac;
3435	RF_AutoConfig_t *auto_config;
3436	RF_ComponentLabel_t *clabel;
3437	int c;
3438	int num_cols;
3439	int num_missing;
3440	int mod_counter;
3441	int mod_counter_found;
3442	int even_pair_failed;
3443	char parity_type;
3444
3445
3446	/* check to see that we have enough 'live' components
3447	   of this set.  If so, we can configure it if necessary */
3448
3449	num_cols = cset->ac->clabel->num_columns;
3450	parity_type = cset->ac->clabel->parityConfig;
3451
3452	/* XXX Check for duplicate components!?!?!? */
3453
3454	/* Determine what the mod_counter is supposed to be for this set. */
3455
3456	mod_counter_found = 0;
3457	mod_counter = 0;
3458	ac = cset->ac;
3459	while(ac!=NULL) {
3460		if (mod_counter_found==0) {
3461			mod_counter = ac->clabel->mod_counter;
3462			mod_counter_found = 1;
3463		} else {
3464			if (ac->clabel->mod_counter > mod_counter) {
3465				mod_counter = ac->clabel->mod_counter;
3466			}
3467		}
3468		ac = ac->next;
3469	}
3470
3471	num_missing = 0;
3472	auto_config = cset->ac;
3473
3474	even_pair_failed = 0;
3475	for(c=0; c<num_cols; c++) {
3476		ac = auto_config;
3477		while(ac!=NULL) {
3478			if ((ac->clabel->column == c) &&
3479			    (ac->clabel->mod_counter == mod_counter)) {
3480				/* it's this one... */
3481#ifdef DEBUG
3482				printf("Found: %s at %d\n",
3483				       ac->devname,c);
3484#endif
3485				break;
3486			}
3487			ac=ac->next;
3488		}
3489		if (ac==NULL) {
3490				/* Didn't find one here! */
3491				/* special case for RAID 1, especially
3492				   where there are more than 2
3493				   components (where RAIDframe treats
3494				   things a little differently :( ) */
3495			if (parity_type == '1') {
3496				if (c%2 == 0) { /* even component */
3497					even_pair_failed = 1;
3498				} else { /* odd component.  If
3499					    we're failed, and
3500					    so is the even
3501					    component, it's
3502					    "Good Night, Charlie" */
3503					if (even_pair_failed == 1) {
3504						return(0);
3505					}
3506				}
3507			} else {
3508				/* normal accounting */
3509				num_missing++;
3510			}
3511		}
3512		if ((parity_type == '1') && (c%2 == 1)) {
3513				/* Just did an even component, and we didn't
3514				   bail.. reset the even_pair_failed flag,
3515				   and go on to the next component.... */
3516			even_pair_failed = 0;
3517		}
3518	}
3519
3520	clabel = cset->ac->clabel;
3521
3522	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3523	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3524	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
3525		/* XXX this needs to be made *much* more general */
3526		/* Too many failures */
3527		return(0);
3528	}
3529	/* otherwise, all is well, and we've got enough to take a kick
3530	   at autoconfiguring this set */
3531	return(1);
3532}
3533
3534void
3535rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3536			RF_Raid_t *raidPtr)
3537{
3538	RF_ComponentLabel_t *clabel;
3539	int i;
3540
3541	clabel = ac->clabel;
3542
3543	/* 1. Fill in the common stuff */
3544	config->numRow = clabel->num_rows = 1;
3545	config->numCol = clabel->num_columns;
3546	config->numSpare = 0; /* XXX should this be set here? */
3547	config->sectPerSU = clabel->sectPerSU;
3548	config->SUsPerPU = clabel->SUsPerPU;
3549	config->SUsPerRU = clabel->SUsPerRU;
3550	config->parityConfig = clabel->parityConfig;
3551	/* XXX... */
3552	strcpy(config->diskQueueType,"fifo");
3553	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3554	config->layoutSpecificSize = 0; /* XXX ?? */
3555
3556	while(ac!=NULL) {
3557		/* row/col values will be in range due to the checks
3558		   in reasonable_label() */
3559		strcpy(config->devnames[0][ac->clabel->column],
3560		       ac->devname);
3561		ac = ac->next;
3562	}
3563
3564	for(i=0;i<RF_MAXDBGV;i++) {
3565		config->debugVars[i][0] = 0;
3566	}
3567}
3568
3569int
3570rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3571{
3572	RF_ComponentLabel_t *clabel;
3573	int column;
3574	int sparecol;
3575
3576	raidPtr->autoconfigure = new_value;
3577
3578	for(column=0; column<raidPtr->numCol; column++) {
3579		if (raidPtr->Disks[column].status == rf_ds_optimal) {
3580			clabel = raidget_component_label(raidPtr, column);
3581			clabel->autoconfigure = new_value;
3582			raidflush_component_label(raidPtr, column);
3583		}
3584	}
3585	for(column = 0; column < raidPtr->numSpare ; column++) {
3586		sparecol = raidPtr->numCol + column;
3587		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3588			clabel = raidget_component_label(raidPtr, sparecol);
3589			clabel->autoconfigure = new_value;
3590			raidflush_component_label(raidPtr, sparecol);
3591		}
3592	}
3593	return(new_value);
3594}
3595
3596int
3597rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3598{
3599	RF_ComponentLabel_t *clabel;
3600	int column;
3601	int sparecol;
3602
3603	raidPtr->root_partition = new_value;
3604	for(column=0; column<raidPtr->numCol; column++) {
3605		if (raidPtr->Disks[column].status == rf_ds_optimal) {
3606			clabel = raidget_component_label(raidPtr, column);
3607			clabel->root_partition = new_value;
3608			raidflush_component_label(raidPtr, column);
3609		}
3610	}
3611	for(column = 0; column < raidPtr->numSpare ; column++) {
3612		sparecol = raidPtr->numCol + column;
3613		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3614			clabel = raidget_component_label(raidPtr, sparecol);
3615			clabel->root_partition = new_value;
3616			raidflush_component_label(raidPtr, sparecol);
3617		}
3618	}
3619	return(new_value);
3620}
3621
3622void
3623rf_release_all_vps(RF_ConfigSet_t *cset)
3624{
3625	RF_AutoConfig_t *ac;
3626
3627	ac = cset->ac;
3628	while(ac!=NULL) {
3629		/* Close the vp, and give it back */
3630		if (ac->vp) {
3631			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3632			VOP_CLOSE(ac->vp, FREAD, NOCRED);
3633			vput(ac->vp);
3634			ac->vp = NULL;
3635		}
3636		ac = ac->next;
3637	}
3638}
3639
3640
3641void
3642rf_cleanup_config_set(RF_ConfigSet_t *cset)
3643{
3644	RF_AutoConfig_t *ac;
3645	RF_AutoConfig_t *next_ac;
3646
3647	ac = cset->ac;
3648	while(ac!=NULL) {
3649		next_ac = ac->next;
3650		/* nuke the label */
3651		free(ac->clabel, M_RAIDFRAME);
3652		/* cleanup the config structure */
3653		free(ac, M_RAIDFRAME);
3654		/* "next.." */
3655		ac = next_ac;
3656	}
3657	/* and, finally, nuke the config set */
3658	free(cset, M_RAIDFRAME);
3659}
3660
3661
3662void
3663raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3664{
3665	/* current version number */
3666	clabel->version = RF_COMPONENT_LABEL_VERSION;
3667	clabel->serial_number = raidPtr->serial_number;
3668	clabel->mod_counter = raidPtr->mod_counter;
3669
3670	clabel->num_rows = 1;
3671	clabel->num_columns = raidPtr->numCol;
3672	clabel->clean = RF_RAID_DIRTY; /* not clean */
3673	clabel->status = rf_ds_optimal; /* "It's good!" */
3674
3675	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3676	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3677	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3678
3679	clabel->blockSize = raidPtr->bytesPerSector;
3680	rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3681
3682	/* XXX not portable */
3683	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3684	clabel->maxOutstanding = raidPtr->maxOutstanding;
3685	clabel->autoconfigure = raidPtr->autoconfigure;
3686	clabel->root_partition = raidPtr->root_partition;
3687	clabel->last_unit = raidPtr->raidid;
3688	clabel->config_order = raidPtr->config_order;
3689
3690#ifndef RF_NO_PARITY_MAP
3691	rf_paritymap_init_label(raidPtr->parity_map, clabel);
3692#endif
3693}
3694
3695int
3696rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3697{
3698	RF_Raid_t *raidPtr;
3699	RF_Config_t *config;
3700	int raidID;
3701	int retcode;
3702
3703#ifdef DEBUG
3704	printf("RAID autoconfigure\n");
3705#endif
3706
3707	retcode = 0;
3708	*unit = -1;
3709
3710	/* 1. Create a config structure */
3711
3712	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3713				       M_RAIDFRAME,
3714				       M_NOWAIT);
3715	if (config==NULL) {
3716		printf("Out of mem!?!?\n");
3717				/* XXX do something more intelligent here. */
3718		return(1);
3719	}
3720
3721	memset(config, 0, sizeof(RF_Config_t));
3722
3723	/*
3724	   2. Figure out what RAID ID this one is supposed to live at
3725	   See if we can get the same RAID dev that it was configured
3726	   on last time..
3727	*/
3728
3729	raidID = cset->ac->clabel->last_unit;
3730	if ((raidID < 0) || (raidID >= numraid)) {
3731		/* let's not wander off into lala land. */
3732		raidID = numraid - 1;
3733	}
3734	if (raidPtrs[raidID]->valid != 0) {
3735
3736		/*
3737		   Nope... Go looking for an alternative...
3738		   Start high so we don't immediately use raid0 if that's
3739		   not taken.
3740		*/
3741
3742		for(raidID = numraid - 1; raidID >= 0; raidID--) {
3743			if (raidPtrs[raidID]->valid == 0) {
3744				/* can use this one! */
3745				break;
3746			}
3747		}
3748	}
3749
3750	if (raidID < 0) {
3751		/* punt... */
3752		printf("Unable to auto configure this set!\n");
3753		printf("(Out of RAID devs!)\n");
3754		free(config, M_RAIDFRAME);
3755		return(1);
3756	}
3757
3758#ifdef DEBUG
3759	printf("Configuring raid%d:\n",raidID);
3760#endif
3761
3762	raidPtr = raidPtrs[raidID];
3763
3764	/* XXX all this stuff should be done SOMEWHERE ELSE! */
3765	raidPtr->raidid = raidID;
3766	raidPtr->openings = RAIDOUTSTANDING;
3767
3768	/* 3. Build the configuration structure */
3769	rf_create_configuration(cset->ac, config, raidPtr);
3770
3771	/* 4. Do the configuration */
3772	retcode = rf_Configure(raidPtr, config, cset->ac);
3773
3774	if (retcode == 0) {
3775
3776		raidinit(raidPtrs[raidID]);
3777
3778		rf_markalldirty(raidPtrs[raidID]);
3779		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3780		if (cset->ac->clabel->root_partition==1) {
3781			/* everything configured just fine.  Make a note
3782			   that this set is eligible to be root. */
3783			cset->rootable = 1;
3784			/* XXX do this here? */
3785			raidPtrs[raidID]->root_partition = 1;
3786		}
3787	}
3788
3789	/* 5. Cleanup */
3790	free(config, M_RAIDFRAME);
3791
3792	*unit = raidID;
3793	return(retcode);
3794}
3795
3796void
3797rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3798{
3799	struct buf *bp;
3800
3801	bp = (struct buf *)desc->bp;
3802	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3803	    (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3804}
3805
3806void
3807rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3808	     size_t xmin, size_t xmax)
3809{
3810	pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3811	pool_sethiwat(p, xmax);
3812	pool_prime(p, xmin);
3813	pool_setlowat(p, xmin);
3814}
3815
3816/*
3817 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3818 * if there is IO pending and if that IO could possibly be done for a
3819 * given RAID set.  Returns 0 if IO is waiting and can be done, 1
3820 * otherwise.
3821 *
3822 */
3823
3824int
3825rf_buf_queue_check(int raidid)
3826{
3827	if ((bufq_peek(raid_softc[raidid].buf_queue) != NULL) &&
3828	    raidPtrs[raidid]->openings > 0) {
3829		/* there is work to do */
3830		return 0;
3831	}
3832	/* default is nothing to do */
3833	return 1;
3834}
3835
3836int
3837rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3838{
3839	uint64_t numsecs;
3840	unsigned secsize;
3841	int error;
3842
3843	error = getdisksize(vp, &numsecs, &secsize);
3844	if (error == 0) {
3845		diskPtr->blockSize = secsize;
3846		diskPtr->numBlocks = numsecs - rf_protectedSectors;
3847		diskPtr->partitionSize = numsecs;
3848		return 0;
3849	}
3850	return error;
3851}
3852
3853static int
3854raid_match(device_t self, cfdata_t cfdata, void *aux)
3855{
3856	return 1;
3857}
3858
3859static void
3860raid_attach(device_t parent, device_t self, void *aux)
3861{
3862
3863}
3864
3865
3866static int
3867raid_detach(device_t self, int flags)
3868{
3869	int error;
3870	struct raid_softc *rs = &raid_softc[device_unit(self)];
3871
3872	if ((error = raidlock(rs)) != 0)
3873		return (error);
3874
3875	error = raid_detach_unlocked(rs);
3876
3877	raidunlock(rs);
3878
3879	return error;
3880}
3881
3882static void
3883rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr)
3884{
3885	prop_dictionary_t disk_info, odisk_info, geom;
3886	disk_info = prop_dictionary_create();
3887	geom = prop_dictionary_create();
3888	prop_dictionary_set_uint64(geom, "sectors-per-unit",
3889				   raidPtr->totalSectors);
3890	prop_dictionary_set_uint32(geom, "sector-size",
3891				   raidPtr->bytesPerSector);
3892
3893	prop_dictionary_set_uint16(geom, "sectors-per-track",
3894				   raidPtr->Layout.dataSectorsPerStripe);
3895	prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
3896				   4 * raidPtr->numCol);
3897
3898	prop_dictionary_set_uint64(geom, "cylinders-per-unit",
3899	   raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe *
3900	   (4 * raidPtr->numCol)));
3901
3902	prop_dictionary_set(disk_info, "geometry", geom);
3903	prop_object_release(geom);
3904	prop_dictionary_set(device_properties(rs->sc_dev),
3905			    "disk-info", disk_info);
3906	odisk_info = rs->sc_dkdev.dk_info;
3907	rs->sc_dkdev.dk_info = disk_info;
3908	if (odisk_info)
3909		prop_object_release(odisk_info);
3910}
3911
3912/*
3913 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3914 * We end up returning whatever error was returned by the first cache flush
3915 * that fails.
3916 */
3917
3918int
3919rf_sync_component_caches(RF_Raid_t *raidPtr)
3920{
3921	int c, sparecol;
3922	int e,error;
3923	int force = 1;
3924
3925	error = 0;
3926	for (c = 0; c < raidPtr->numCol; c++) {
3927		if (raidPtr->Disks[c].status == rf_ds_optimal) {
3928			e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3929					  &force, FWRITE, NOCRED);
3930			if (e) {
3931				if (e != ENODEV)
3932					printf("raid%d: cache flush to component %s failed.\n",
3933					       raidPtr->raidid, raidPtr->Disks[c].devname);
3934				if (error == 0) {
3935					error = e;
3936				}
3937			}
3938		}
3939	}
3940
3941	for( c = 0; c < raidPtr->numSpare ; c++) {
3942		sparecol = raidPtr->numCol + c;
3943		/* Need to ensure that the reconstruct actually completed! */
3944		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3945			e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3946					  DIOCCACHESYNC, &force, FWRITE, NOCRED);
3947			if (e) {
3948				if (e != ENODEV)
3949					printf("raid%d: cache flush to component %s failed.\n",
3950					       raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3951				if (error == 0) {
3952					error = e;
3953				}
3954			}
3955		}
3956	}
3957	return error;
3958}
3959