md_intel.c revision 235874
1139804Simp/*-
213675Sdyson * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
313675Sdyson * Copyright (c) 2000 - 2008 S��ren Schmidt <sos@FreeBSD.org>
413675Sdyson * All rights reserved.
513675Sdyson *
613675Sdyson * Redistribution and use in source and binary forms, with or without
713675Sdyson * modification, are permitted provided that the following conditions
813675Sdyson * are met:
913675Sdyson * 1. Redistributions of source code must retain the above copyright
1013675Sdyson *    notice, this list of conditions and the following disclaimer.
1113675Sdyson * 2. Redistributions in binary form must reproduce the above copyright
1213675Sdyson *    notice, this list of conditions and the following disclaimer in the
1313675Sdyson *    documentation and/or other materials provided with the distribution.
1413675Sdyson *
1513675Sdyson * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
1614037Sdyson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1713675Sdyson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1813675Sdyson * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
1913675Sdyson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2013675Sdyson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2113675Sdyson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2213675Sdyson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2313675Sdyson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2413675Sdyson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2513675Sdyson * SUCH DAMAGE.
2613675Sdyson */
2713907Sdyson
2813907Sdyson#include <sys/cdefs.h>
2913907Sdyson__FBSDID("$FreeBSD: stable/9/sys/geom/raid/md_intel.c 235874 2012-05-24 02:34:03Z mav $");
3013907Sdyson
3113907Sdyson#include <sys/param.h>
3213907Sdyson#include <sys/bio.h>
3313907Sdyson#include <sys/endian.h>
3413907Sdyson#include <sys/kernel.h>
3513907Sdyson#include <sys/kobj.h>
3613907Sdyson#include <sys/limits.h>
3713913Sdyson#include <sys/lock.h>
3813907Sdyson#include <sys/malloc.h>
3913907Sdyson#include <sys/mutex.h>
4013907Sdyson#include <sys/systm.h>
4113907Sdyson#include <sys/taskqueue.h>
4213907Sdyson#include <geom/geom.h>
4313907Sdyson#include "geom/raid/g_raid.h"
4413907Sdyson#include "g_raid_md_if.h"
4513907Sdyson
46118764Ssilbystatic MALLOC_DEFINE(M_MD_INTEL, "md_intel_data", "GEOM_RAID Intel metadata");
47117325Ssilby
48118764Ssilbystruct intel_raid_map {
49117325Ssilby	uint32_t	offset;
50118764Ssilby	uint32_t	disk_sectors;
51133790Ssilby	uint32_t	stripe_count;
52133790Ssilby	uint16_t	strip_sectors;
53117325Ssilby	uint8_t		status;
54133790Ssilby#define INTEL_S_READY           0x00
55133790Ssilby#define INTEL_S_UNINITIALIZED   0x01
56117325Ssilby#define INTEL_S_DEGRADED        0x02
57133790Ssilby#define INTEL_S_FAILURE         0x03
58133790Ssilby
59117325Ssilby	uint8_t		type;
60133790Ssilby#define INTEL_T_RAID0           0x00
61133790Ssilby#define INTEL_T_RAID1           0x01
62133790Ssilby#define INTEL_T_RAID5           0x05
63133790Ssilby
64133790Ssilby	uint8_t		total_disks;
65133790Ssilby	uint8_t		total_domains;
66133790Ssilby	uint8_t		failed_disk_num;
67133790Ssilby	uint8_t		ddf;
68133790Ssilby	uint32_t	offset_hi;
69133049Ssilby	uint32_t	disk_sectors_hi;
70133790Ssilby	uint32_t	stripe_count_hi;
71133790Ssilby	uint32_t	filler_2[4];
72133790Ssilby	uint32_t	disk_idx[1];	/* total_disks entries. */
73133790Ssilby#define INTEL_DI_IDX	0x00ffffff
74133790Ssilby#define INTEL_DI_RBLD	0x01000000
75133790Ssilby} __packed;
76133790Ssilby
77133790Ssilbystruct intel_raid_vol {
78133790Ssilby	uint8_t		name[16];
79133049Ssilby	u_int64_t	total_sectors __packed;
80133049Ssilby	uint32_t	state;
81133049Ssilby#define INTEL_ST_BOOTABLE		0x00000001
82133049Ssilby#define INTEL_ST_BOOT_DEVICE		0x00000002
83133790Ssilby#define INTEL_ST_READ_COALESCING	0x00000004
84133790Ssilby#define INTEL_ST_WRITE_COALESCING	0x00000008
85133049Ssilby#define INTEL_ST_LAST_SHUTDOWN_DIRTY	0x00000010
86133049Ssilby#define INTEL_ST_HIDDEN_AT_BOOT		0x00000020
87133049Ssilby#define INTEL_ST_CURRENTLY_HIDDEN	0x00000040
88133049Ssilby#define INTEL_ST_VERIFY_AND_FIX		0x00000080
8913907Sdyson#define INTEL_ST_MAP_STATE_UNINIT	0x00000100
9013907Sdyson#define INTEL_ST_NO_AUTO_RECOVERY	0x00000200
91116182Sobrien#define INTEL_ST_CLONE_N_GO		0x00000400
92116182Sobrien#define INTEL_ST_CLONE_MAN_SYNC		0x00000800
93116182Sobrien#define INTEL_ST_CNG_MASTER_DISK_NUM	0x00001000
9413675Sdyson	uint32_t	reserved;
9513675Sdyson	uint8_t		migr_priority;
9624131Sbde	uint8_t		num_sub_vols;
9713675Sdyson	uint8_t		tid;
9813675Sdyson	uint8_t		cng_master_disk;
9924206Sbde	uint16_t	cache_policy;
10091372Salfred	uint8_t		cng_state;
10176166Smarkm	uint8_t		cng_sub_state;
10276827Salfred	uint32_t	filler_0[10];
10324206Sbde
10413675Sdyson	uint32_t	curr_migr_unit;
10591968Salfred	uint32_t	checkpoint_id;
10629356Speter	uint8_t		migr_state;
10770834Swollman	uint8_t		migr_type;
10813675Sdyson#define INTEL_MT_INIT		0
109184849Sed#define INTEL_MT_REBUILD	1
110117325Ssilby#define INTEL_MT_VERIFY		2
11113675Sdyson#define INTEL_MT_GEN_MIGR	3
11213675Sdyson#define INTEL_MT_STATE_CHANGE	4
11376166Smarkm#define INTEL_MT_REPAIR		5
11455112Sbde	uint8_t		dirty;
11534924Sbde	uint8_t		fs_state;
11659288Sjlemon	uint16_t	verify_errors;
11713675Sdyson	uint16_t	bad_blocks;
118163606Srwatson	uint32_t	curr_migr_unit_hi;
119163606Srwatson	uint32_t	filler_1[3];
12013675Sdyson	struct intel_raid_map map[1];	/* 2 entries if migr_state != 0. */
12113675Sdyson} __packed;
12213675Sdyson
12313675Sdysonstruct intel_raid_disk {
12413675Sdyson#define INTEL_SERIAL_LEN	16
12513675Sdyson	uint8_t		serial[INTEL_SERIAL_LEN];
12613675Sdyson	uint32_t	sectors;
12713907Sdyson	uint32_t	id;
12892751Sjeff	uint32_t	flags;
12913675Sdyson#define INTEL_F_SPARE		0x01
13014037Sdyson#define INTEL_F_ASSIGNED	0x02
13114037Sdyson#define INTEL_F_FAILED		0x04
13214037Sdyson#define INTEL_F_ONLINE		0x08
13314037Sdyson	uint32_t	owner_cfg_num;
13414037Sdyson	uint32_t	sectors_hi;
13514037Sdyson	uint32_t	filler[3];
13614037Sdyson} __packed;
13714037Sdyson
13814037Sdysonstruct intel_raid_conf {
13914037Sdyson	uint8_t		intel_id[24];
140108255Sphk#define INTEL_MAGIC             "Intel Raid ISM Cfg Sig. "
141108255Sphk
142175140Sjhb	uint8_t		version[6];
143108255Sphk#define INTEL_VERSION_1000	"1.0.00"	/* RAID0 */
144108255Sphk#define INTEL_VERSION_1100	"1.1.00"	/* RAID1 */
145108255Sphk#define INTEL_VERSION_1200	"1.2.00"	/* Many volumes */
146108255Sphk#define INTEL_VERSION_1201	"1.2.01"	/* 3 or 4 disks */
147108255Sphk#define INTEL_VERSION_1202	"1.2.02"	/* RAID5 */
14813675Sdyson#define INTEL_VERSION_1204	"1.2.04"	/* 5 or 6 disks */
14972521Sjlemon#define INTEL_VERSION_1206	"1.2.06"	/* CNG */
150116546Sphk#define INTEL_VERSION_1300	"1.3.00"	/* Attributes */
151116546Sphk
152175140Sjhb	uint8_t		dummy_0[2];
153116546Sphk	uint32_t	checksum;
154116546Sphk	uint32_t	config_size;
155116546Sphk	uint32_t	config_id;
156116546Sphk	uint32_t	generation;
157116546Sphk	uint32_t	error_log_size;
158116546Sphk	uint32_t	attributes;
15972521Sjlemon#define INTEL_ATTR_RAID0	0x00000001
16013675Sdyson#define INTEL_ATTR_RAID1	0x00000002
16159288Sjlemon#define INTEL_ATTR_RAID10	0x00000004
16259288Sjlemon#define INTEL_ATTR_RAID1E	0x00000008
16359288Sjlemon#define INTEL_ATTR_RAID5	0x00000010
16459288Sjlemon#define INTEL_ATTR_RAIDCNG	0x00000020
165197134Srwatson#define INTEL_ATTR_2TB		0x20000000
166197134Srwatson#define INTEL_ATTR_PM		0x40000000
167197134Srwatson#define INTEL_ATTR_CHECKSUM	0x80000000
168197134Srwatson
169197134Srwatson	uint8_t		total_disks;
170197134Srwatson	uint8_t		total_volumes;
171197134Srwatson	uint8_t		dummy_2[2];
172197134Srwatson	uint32_t	filler_0[39];
173197134Srwatson	struct intel_raid_disk	disk[1];	/* total_disks entries. */
174197134Srwatson	/* Here goes total_volumes of struct intel_raid_vol. */
17559288Sjlemon} __packed;
17613675Sdyson
17713675Sdyson#define INTEL_MAX_MD_SIZE(ndisks)				\
17813675Sdyson    (sizeof(struct intel_raid_conf) +				\
17913675Sdyson     sizeof(struct intel_raid_disk) * (ndisks - 1) +		\
18013675Sdyson     sizeof(struct intel_raid_vol) * 2 +			\
18113675Sdyson     sizeof(struct intel_raid_map) * 2 +			\
18213907Sdyson     sizeof(uint32_t) * (ndisks - 1) * 4)
18313907Sdyson
18413675Sdysonstruct g_raid_md_intel_perdisk {
185189649Sjhb	struct intel_raid_conf	*pd_meta;
186133790Ssilby	int			 pd_disk_pos;
187133790Ssilby	struct intel_raid_disk	 pd_disk_meta;
188133790Ssilby};
189133790Ssilby
19013907Sdysonstruct g_raid_md_intel_object {
191189649Sjhb	struct g_raid_md_object	 mdio_base;
192117325Ssilby	uint32_t		 mdio_config_id;
193189649Sjhb	uint32_t		 mdio_generation;
194117325Ssilby	struct intel_raid_conf	*mdio_meta;
195133790Ssilby	struct callout		 mdio_start_co;	/* STARTING state timer. */
196133790Ssilby	int			 mdio_disks_present;
197133790Ssilby	int			 mdio_started;
198133790Ssilby	int			 mdio_incomplete;
199133790Ssilby	struct root_hold_token	*mdio_rootmount; /* Root mount delay token. */
200133790Ssilby};
201133790Ssilby
202133790Ssilbystatic g_raid_md_create_t g_raid_md_create_intel;
203117325Ssilbystatic g_raid_md_taste_t g_raid_md_taste_intel;
20491413Salfredstatic g_raid_md_event_t g_raid_md_event_intel;
20591413Salfredstatic g_raid_md_ctl_t g_raid_md_ctl_intel;
20691413Salfredstatic g_raid_md_write_t g_raid_md_write_intel;
207133790Ssilbystatic g_raid_md_fail_disk_t g_raid_md_fail_disk_intel;
20891413Salfredstatic g_raid_md_free_disk_t g_raid_md_free_disk_intel;
20991413Salfredstatic g_raid_md_free_t g_raid_md_free_intel;
21091413Salfred
21114037Sdysonstatic kobj_method_t g_raid_md_intel_methods[] = {
21291413Salfred	KOBJMETHOD(g_raid_md_create,	g_raid_md_create_intel),
21391413Salfred	KOBJMETHOD(g_raid_md_taste,	g_raid_md_taste_intel),
21491413Salfred	KOBJMETHOD(g_raid_md_event,	g_raid_md_event_intel),
21591413Salfred	KOBJMETHOD(g_raid_md_ctl,	g_raid_md_ctl_intel),
21614037Sdyson	KOBJMETHOD(g_raid_md_write,	g_raid_md_write_intel),
21791413Salfred	KOBJMETHOD(g_raid_md_fail_disk,	g_raid_md_fail_disk_intel),
218132579Srwatson	KOBJMETHOD(g_raid_md_free_disk,	g_raid_md_free_disk_intel),
21913675Sdyson	KOBJMETHOD(g_raid_md_free,	g_raid_md_free_intel),
220132987Sgreen	{ 0, 0 }
221132987Sgreen};
222125293Srwatson
223125293Srwatsonstatic struct g_raid_md_class g_raid_md_intel_class = {
22492751Sjeff	"Intel",
22527899Sdyson	g_raid_md_intel_methods,
22691372Salfred	sizeof(struct g_raid_md_intel_object),
22791372Salfred	.mdc_priority = 100
22891372Salfred};
22991372Salfred
23091372Salfred
231118880Salcstatic struct intel_raid_map *
232170022Srwatsonintel_get_map(struct intel_raid_vol *mvol, int i)
233170022Srwatson{
234125293Srwatson	struct intel_raid_map *mmap;
235118880Salc
23691372Salfred	if (i > (mvol->migr_state ? 1 : 0))
23791372Salfred		return (NULL);
238132987Sgreen	mmap = &mvol->map[0];
239132987Sgreen	for (; i > 0; i--) {
240125293Srwatson		mmap = (struct intel_raid_map *)
241125293Srwatson		    &mmap->disk_idx[mmap->total_disks];
242125293Srwatson	}
243125293Srwatson	return ((struct intel_raid_map *)mmap);
244125293Srwatson}
245125293Srwatson
246125293Srwatsonstatic struct intel_raid_vol *
247125293Srwatsonintel_get_volume(struct intel_raid_conf *meta, int i)
248125293Srwatson{
249125293Srwatson	struct intel_raid_vol *mvol;
250125293Srwatson	struct intel_raid_map *mmap;
251125293Srwatson
252125293Srwatson	if (i > 1)
253125293Srwatson		return (NULL);
254125293Srwatson	mvol = (struct intel_raid_vol *)&meta->disk[meta->total_disks];
255125293Srwatson	for (; i > 0; i--) {
256125293Srwatson		mmap = intel_get_map(mvol, mvol->migr_state ? 1 : 0);
257125293Srwatson		mvol = (struct intel_raid_vol *)
258125293Srwatson		    &mmap->disk_idx[mmap->total_disks];
259125293Srwatson	}
260125293Srwatson	return (mvol);
261125293Srwatson}
262125293Srwatson
263125293Srwatsonstatic off_t
264125293Srwatsonintel_get_map_offset(struct intel_raid_map *mmap)
265125293Srwatson{
266125293Srwatson	off_t offset = (off_t)mmap->offset_hi << 32;
267125293Srwatson
268125293Srwatson	offset += mmap->offset;
269125293Srwatson	return (offset);
270125293Srwatson}
271125293Srwatson
272125293Srwatsonstatic void
273179243Skibintel_set_map_offset(struct intel_raid_map *mmap, off_t offset)
274179243Skib{
275125293Srwatson
276125293Srwatson	mmap->offset = offset & 0xffffffff;
277125293Srwatson	mmap->offset_hi = offset >> 32;
278125293Srwatson}
279125293Srwatson
280125293Srwatsonstatic off_t
281125293Srwatsonintel_get_map_disk_sectors(struct intel_raid_map *mmap)
282125293Srwatson{
283132987Sgreen	off_t disk_sectors = (off_t)mmap->disk_sectors_hi << 32;
284125293Srwatson
285125293Srwatson	disk_sectors += mmap->disk_sectors;
286132987Sgreen	return (disk_sectors);
287132987Sgreen}
288125293Srwatson
289125293Srwatsonstatic void
290125293Srwatsonintel_set_map_disk_sectors(struct intel_raid_map *mmap, off_t disk_sectors)
291125293Srwatson{
292125293Srwatson
293125293Srwatson	mmap->disk_sectors = disk_sectors & 0xffffffff;
294125293Srwatson	mmap->disk_sectors_hi = disk_sectors >> 32;
295125293Srwatson}
296132987Sgreen
297125293Srwatsonstatic void
298125293Srwatsonintel_set_map_stripe_count(struct intel_raid_map *mmap, off_t stripe_count)
299125293Srwatson{
300125293Srwatson
301125293Srwatson	mmap->stripe_count = stripe_count & 0xffffffff;
302125293Srwatson	mmap->stripe_count_hi = stripe_count >> 32;
303125293Srwatson}
304125293Srwatson
305125293Srwatsonstatic off_t
306125293Srwatsonintel_get_disk_sectors(struct intel_raid_disk *disk)
307125293Srwatson{
308125293Srwatson	off_t sectors = (off_t)disk->sectors_hi << 32;
309125293Srwatson
310125293Srwatson	sectors += disk->sectors;
31113675Sdyson	return (sectors);
312167232Srwatson}
313167232Srwatson
31413675Sdysonstatic void
31513675Sdysonintel_set_disk_sectors(struct intel_raid_disk *disk, off_t sectors)
316184849Sed{
31713675Sdyson
31883366Sjulian	disk->sectors = sectors & 0xffffffff;
31913675Sdyson	disk->sectors_hi = sectors >> 32;
320125293Srwatson}
32113675Sdyson
32213675Sdysonstatic off_t
32327899Sdysonintel_get_vol_curr_migr_unit(struct intel_raid_vol *vol)
324125293Srwatson{
325125293Srwatson	off_t curr_migr_unit = (off_t)vol->curr_migr_unit_hi << 32;
326125293Srwatson
327126249Srwatson	curr_migr_unit += vol->curr_migr_unit;
328172930Srwatson	return (curr_migr_unit);
329126249Srwatson}
330125293Srwatson
331172930Srwatsonstatic void
332172930Srwatsonintel_set_vol_curr_migr_unit(struct intel_raid_vol *vol, off_t curr_migr_unit)
333125293Srwatson{
334125293Srwatson
335125293Srwatson	vol->curr_migr_unit = curr_migr_unit & 0xffffffff;
336125293Srwatson	vol->curr_migr_unit_hi = curr_migr_unit >> 32;
337193951Skib}
338193951Skib
339140369Ssilbystatic void
340133790Ssilbyg_raid_md_intel_print(struct intel_raid_conf *meta)
341155035Sglebius{
342155035Sglebius	struct intel_raid_vol *mvol;
343124394Sdes	struct intel_raid_map *mmap;
344124394Sdes	int i, j, k;
345155035Sglebius
34676364Salfred	if (g_raid_debug < 1)
347124394Sdes		return;
34813907Sdyson
34913907Sdyson	printf("********* ATA Intel MatrixRAID Metadata *********\n");
35013675Sdyson	printf("intel_id            <%.24s>\n", meta->intel_id);
35183366Sjulian	printf("version             <%.6s>\n", meta->version);
35270915Sdwmalone	printf("checksum            0x%08x\n", meta->checksum);
35370915Sdwmalone	printf("config_size         0x%08x\n", meta->config_size);
35470915Sdwmalone	printf("config_id           0x%08x\n", meta->config_id);
35570915Sdwmalone	printf("generation          0x%08x\n", meta->generation);
35670915Sdwmalone	printf("attributes          0x%08x\n", meta->attributes);
357121256Sdwmalone	printf("total_disks         %u\n", meta->total_disks);
358184849Sed	printf("total_volumes       %u\n", meta->total_volumes);
35970915Sdwmalone	printf("DISK#   serial disk_sectors disk_sectors_hi disk_id flags\n");
36070803Sdwmalone	for (i = 0; i < meta->total_disks; i++ ) {
36170803Sdwmalone		printf("    %d   <%.16s> %u %u 0x%08x 0x%08x\n", i,
36270803Sdwmalone		    meta->disk[i].serial, meta->disk[i].sectors,
36370803Sdwmalone		    meta->disk[i].sectors_hi,
36470803Sdwmalone		    meta->disk[i].id, meta->disk[i].flags);
36570803Sdwmalone	}
366174988Sjeff	for (i = 0; i < meta->total_volumes; i++) {
36783366Sjulian		mvol = intel_get_volume(meta, i);
36870915Sdwmalone		printf(" ****** Volume %d ******\n", i);
369184849Sed		printf(" name               %.16s\n", mvol->name);
37083366Sjulian		printf(" total_sectors      %ju\n", mvol->total_sectors);
37170915Sdwmalone		printf(" state              %u\n", mvol->state);
37270915Sdwmalone		printf(" reserved           %u\n", mvol->reserved);
37370915Sdwmalone		printf(" curr_migr_unit     %u\n", mvol->curr_migr_unit);
37470915Sdwmalone		printf(" curr_migr_unit_hi  %u\n", mvol->curr_migr_unit_hi);
375121256Sdwmalone		printf(" checkpoint_id      %u\n", mvol->checkpoint_id);
376174988Sjeff		printf(" migr_state         %u\n", mvol->migr_state);
377121256Sdwmalone		printf(" migr_type          %u\n", mvol->migr_type);
378184849Sed		printf(" dirty              %u\n", mvol->dirty);
37983366Sjulian
38013675Sdyson		for (j = 0; j < (mvol->migr_state ? 2 : 1); j++) {
38113675Sdyson			printf("  *** Map %d ***\n", j);
38213675Sdyson			mmap = intel_get_map(mvol, j);
38313675Sdyson			printf("  offset            %u\n", mmap->offset);
384184849Sed			printf("  offset_hi         %u\n", mmap->offset_hi);
385184849Sed			printf("  disk_sectors      %u\n", mmap->disk_sectors);
386184849Sed			printf("  disk_sectors_hi   %u\n", mmap->disk_sectors_hi);
387184849Sed			printf("  stripe_count      %u\n", mmap->stripe_count);
388184849Sed			printf("  stripe_count_hi   %u\n", mmap->stripe_count_hi);
389184849Sed			printf("  strip_sectors     %u\n", mmap->strip_sectors);
390184849Sed			printf("  status            %u\n", mmap->status);
391184849Sed			printf("  type              %u\n", mmap->type);
392184849Sed			printf("  total_disks       %u\n", mmap->total_disks);
393184849Sed			printf("  total_domains     %u\n", mmap->total_domains);
394184849Sed			printf("  failed_disk_num   %u\n", mmap->failed_disk_num);
395184849Sed			printf("  ddf               %u\n", mmap->ddf);
396184849Sed			printf("  disk_idx         ");
397184849Sed			for (k = 0; k < mmap->total_disks; k++)
398184849Sed				printf(" 0x%08x", mmap->disk_idx[k]);
399184849Sed			printf("\n");
400184849Sed		}
40113909Sdyson	}
40213909Sdyson	printf("=================================================\n");
40376364Salfred}
40476364Salfred
40576364Salfredstatic struct intel_raid_conf *
40613909Sdysonintel_meta_copy(struct intel_raid_conf *meta)
40776364Salfred{
408132579Srwatson	struct intel_raid_conf *nmeta;
40913675Sdyson
41076364Salfred	nmeta = malloc(meta->config_size, M_MD_INTEL, M_WAITOK);
41113675Sdyson	memcpy(nmeta, meta, meta->config_size);
41276364Salfred	return (nmeta);
413133790Ssilby}
414117325Ssilby
415117325Ssilbystatic int
41613675Sdysonintel_meta_find_disk(struct intel_raid_conf *meta, char *serial)
417125293Srwatson{
418133790Ssilby	int pos;
419133790Ssilby
420133790Ssilby	for (pos = 0; pos < meta->total_disks; pos++) {
421133790Ssilby		if (strncmp(meta->disk[pos].serial,
422133790Ssilby		    serial, INTEL_SERIAL_LEN) == 0)
423133790Ssilby			return (pos);
42479224Sdillon	}
425118764Ssilby	return (-1);
426118764Ssilby}
42713675Sdyson
428122163Salcstatic struct intel_raid_conf *
42976364Salfredintel_meta_read(struct g_consumer *cp)
43013688Sdyson{
43176364Salfred	struct g_provider *pp;
432133790Ssilby	struct intel_raid_conf *meta;
433133790Ssilby	struct intel_raid_vol *mvol;
434133790Ssilby	struct intel_raid_map *mmap;
435133790Ssilby	char *buf;
436133790Ssilby	int error, i, j, k, left, size;
437133790Ssilby	uint32_t checksum, *ptr;
438133790Ssilby
439133790Ssilby	pp = cp->provider;
440133790Ssilby
441133790Ssilby	/* Read the anchor sector. */
442133790Ssilby	buf = g_read_data(cp,
443133790Ssilby	    pp->mediasize - pp->sectorsize * 2, pp->sectorsize, &error);
444133790Ssilby	if (buf == NULL) {
44576364Salfred		G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).",
44676364Salfred		    pp->name, error);
44776364Salfred		return (NULL);
448133790Ssilby	}
449133790Ssilby	meta = (struct intel_raid_conf *)buf;
450133790Ssilby
451133790Ssilby	/* Check if this is an Intel RAID struct */
452133790Ssilby	if (strncmp(meta->intel_id, INTEL_MAGIC, strlen(INTEL_MAGIC))) {
453133790Ssilby		G_RAID_DEBUG(1, "Intel signature check failed on %s", pp->name);
454133790Ssilby		g_free(buf);
455133790Ssilby		return (NULL);
456133790Ssilby	}
457133790Ssilby	if (meta->config_size > 65536 ||
458133790Ssilby	    meta->config_size < sizeof(struct intel_raid_conf)) {
459133790Ssilby		G_RAID_DEBUG(1, "Intel metadata size looks wrong: %d",
460133790Ssilby		    meta->config_size);
461133790Ssilby		g_free(buf);
46276364Salfred		return (NULL);
46376364Salfred	}
46476364Salfred	size = meta->config_size;
465133790Ssilby	meta = malloc(size, M_MD_INTEL, M_WAITOK);
46676364Salfred	memcpy(meta, buf, min(size, pp->sectorsize));
467133790Ssilby	g_free(buf);
468189649Sjhb
46976364Salfred	/* Read all the rest, if needed. */
47013907Sdyson	if (meta->config_size > pp->sectorsize) {
47113688Sdyson		left = (meta->config_size - 1) / pp->sectorsize;
47213907Sdyson		buf = g_read_data(cp,
473132579Srwatson		    pp->mediasize - pp->sectorsize * (2 + left),
474132579Srwatson		    pp->sectorsize * left, &error);
475132579Srwatson		if (buf == NULL) {
476132579Srwatson			G_RAID_DEBUG(1, "Cannot read remaining metadata"
477132579Srwatson			    " part from %s (error=%d).",
478132579Srwatson			    pp->name, error);
479132579Srwatson			free(meta, M_MD_INTEL);
480132579Srwatson			return (NULL);
481133049Ssilby		}
482133049Ssilby		memcpy(((char *)meta) + pp->sectorsize, buf,
483132579Srwatson		    pp->sectorsize * left);
484132579Srwatson		g_free(buf);
485132579Srwatson	}
486132579Srwatson
48713675Sdyson	/* Check metadata checksum. */
48813675Sdyson	for (checksum = 0, ptr = (uint32_t *)meta, i = 0;
48913675Sdyson	    i < (meta->config_size / sizeof(uint32_t)); i++) {
49013907Sdyson		checksum += *ptr++;
49113675Sdyson	}
49213907Sdyson	checksum -= meta->checksum;
49313675Sdyson	if (checksum != meta->checksum) {
49413776Sdyson		G_RAID_DEBUG(1, "Intel checksum check failed on %s", pp->name);
49576364Salfred		free(meta, M_MD_INTEL);
49691362Salfred		return (NULL);
49791362Salfred	}
49813675Sdyson
49991362Salfred	/* Validate metadata size. */
50091362Salfred	size = sizeof(struct intel_raid_conf) +
50176760Salfred	    sizeof(struct intel_raid_disk) * (meta->total_disks - 1) +
502124394Sdes	    sizeof(struct intel_raid_vol) * meta->total_volumes;
50376760Salfred	if (size > meta->config_size) {
50413675Sdysonbadsize:
50591362Salfred		G_RAID_DEBUG(1, "Intel metadata size incorrect %d < %d",
50676760Salfred		    meta->config_size, size);
50713675Sdyson		free(meta, M_MD_INTEL);
50813675Sdyson		return (NULL);
50913675Sdyson	}
51013675Sdyson	for (i = 0; i < meta->total_volumes; i++) {
51113675Sdyson		mvol = intel_get_volume(meta, i);
51213675Sdyson		mmap = intel_get_map(mvol, 0);
51313675Sdyson		size += 4 * (mmap->total_disks - 1);
51413675Sdyson		if (size > meta->config_size)
51513675Sdyson			goto badsize;
51676364Salfred		if (mvol->migr_state) {
51791362Salfred			size += sizeof(struct intel_raid_map);
518133049Ssilby			if (size > meta->config_size)
519133049Ssilby				goto badsize;
52091362Salfred			mmap = intel_get_map(mvol, 1);
52113675Sdyson			size += 4 * (mmap->total_disks - 1);
52213675Sdyson			if (size > meta->config_size)
52314177Sdyson				goto badsize;
52413675Sdyson		}
52513675Sdyson	}
52613675Sdyson
52714037Sdyson	/* Validate disk indexes. */
52814037Sdyson	for (i = 0; i < meta->total_volumes; i++) {
52914037Sdyson		mvol = intel_get_volume(meta, i);
53014037Sdyson		for (j = 0; j < (mvol->migr_state ? 2 : 1); j++) {
53176364Salfred			mmap = intel_get_map(mvol, j);
532126252Srwatson			for (k = 0; k < mmap->total_disks; k++) {
53314037Sdyson				if ((mmap->disk_idx[k] & INTEL_DI_IDX) >
534122352Stanimura				    meta->total_disks) {
535174647Sjeff					G_RAID_DEBUG(1, "Intel metadata disk"
536174647Sjeff					    " index %d too big (>%d)",
53714037Sdyson					    mmap->disk_idx[k] & INTEL_DI_IDX,
53841086Struckman					    meta->total_disks);
53995883Salfred					free(meta, M_MD_INTEL);
540133741Sjmg					return (NULL);
54114037Sdyson				}
54214037Sdyson			}
543126131Sgreen		}
544126131Sgreen	}
545126131Sgreen
546126131Sgreen	/* Validate migration types. */
547126131Sgreen	for (i = 0; i < meta->total_volumes; i++) {
548133790Ssilby		mvol = intel_get_volume(meta, i);
549126131Sgreen		if (mvol->migr_state &&
550133790Ssilby		    mvol->migr_type != INTEL_MT_INIT &&
551126131Sgreen		    mvol->migr_type != INTEL_MT_REBUILD &&
552126131Sgreen		    mvol->migr_type != INTEL_MT_VERIFY &&
553126131Sgreen		    mvol->migr_type != INTEL_MT_REPAIR) {
554133790Ssilby			G_RAID_DEBUG(1, "Intel metadata has unsupported"
555133790Ssilby			    " migration type %d", mvol->migr_type);
556133790Ssilby			free(meta, M_MD_INTEL);
557133790Ssilby			return (NULL);
558133790Ssilby		}
559133790Ssilby	}
560133790Ssilby
561133790Ssilby	return (meta);
562133790Ssilby}
563132579Srwatson
564126131Sgreenstatic int
565126131Sgreenintel_meta_write(struct g_consumer *cp, struct intel_raid_conf *meta)
56613675Sdyson{
56713675Sdyson	struct g_provider *pp;
568101941Srwatson	char *buf;
56913675Sdyson	int error, i, sectors;
57013675Sdyson	uint32_t checksum, *ptr;
571101941Srwatson
57283366Sjulian	pp = cp->provider;
57345311Sdt
57413675Sdyson	/* Recalculate checksum for case if metadata were changed. */
575109153Sdillon	meta->checksum = 0;
57647748Salc	for (checksum = 0, ptr = (uint32_t *)meta, i = 0;
57713675Sdyson	    i < (meta->config_size / sizeof(uint32_t)); i++) {
57818863Sdyson		checksum += *ptr++;
57913675Sdyson	}
58091362Salfred	meta->checksum = checksum;
58113675Sdyson
58247748Salc	/* Create and fill buffer. */
58347748Salc	sectors = (meta->config_size + pp->sectorsize - 1) / pp->sectorsize;
58447748Salc	buf = malloc(sectors * pp->sectorsize, M_MD_INTEL, M_WAITOK | M_ZERO);
58547748Salc	if (sectors > 1) {
586101768Srwatson		memcpy(buf, ((char *)meta) + pp->sectorsize,
587172930Srwatson		    (sectors - 1) * pp->sectorsize);
588101768Srwatson	}
589101768Srwatson	memcpy(buf + (sectors - 1) * pp->sectorsize, meta, pp->sectorsize);
590101768Srwatson
591133790Ssilby	error = g_write_data(cp,
592133790Ssilby	    pp->mediasize - pp->sectorsize * (1 + sectors),
593133790Ssilby	    buf, pp->sectorsize * sectors);
594133790Ssilby	if (error != 0) {
595133790Ssilby		G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).",
596133790Ssilby		    pp->name, error);
597133790Ssilby	}
598133790Ssilby
599133790Ssilby	free(buf, M_MD_INTEL);
600133790Ssilby	return (error);
601101768Srwatson}
60213675Sdyson
60313907Sdysonstatic int
60413907Sdysonintel_meta_erase(struct g_consumer *cp)
60513907Sdyson{
60613675Sdyson	struct g_provider *pp;
60718863Sdyson	char *buf;
60813675Sdyson	int error;
60913675Sdyson
61018863Sdyson	pp = cp->provider;
61118863Sdyson	buf = malloc(pp->sectorsize, M_MD_INTEL, M_WAITOK | M_ZERO);
61247748Salc	error = g_write_data(cp,
61391362Salfred	    pp->mediasize - 2 * pp->sectorsize,
614116127Smux	    buf, pp->sectorsize);
615116127Smux	if (error != 0) {
616116127Smux		G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).",
61791362Salfred		    pp->name, error);
61876760Salfred	}
61913675Sdyson	free(buf, M_MD_INTEL);
62076760Salfred	return (error);
62113675Sdyson}
62213675Sdyson
62313675Sdysonstatic int
62413675Sdysonintel_meta_write_spare(struct g_consumer *cp, struct intel_raid_disk *d)
62513675Sdyson{
62647748Salc	struct intel_raid_conf *meta;
62747748Salc	int error;
62847748Salc
62947748Salc	/* Fill anchor and single disk. */
63047748Salc	meta = malloc(INTEL_MAX_MD_SIZE(1), M_MD_INTEL, M_WAITOK | M_ZERO);
63147748Salc	memcpy(&meta->intel_id[0], INTEL_MAGIC, sizeof(INTEL_MAGIC) - 1);
63247748Salc	memcpy(&meta->version[0], INTEL_VERSION_1000,
63347748Salc	    sizeof(INTEL_VERSION_1000) - 1);
63447748Salc	meta->config_size = INTEL_MAX_MD_SIZE(1);
63547748Salc	meta->config_id = arc4random();
63613675Sdyson	meta->generation = 1;
63714037Sdyson	meta->total_disks = 1;
63813907Sdyson	meta->disk[0] = *d;
63913907Sdyson	error = intel_meta_write(cp, meta);
64013907Sdyson	free(meta, M_MD_INTEL);
64113907Sdyson	return (error);
64247748Salc}
64318863Sdyson
64418863Sdysonstatic struct g_raid_disk *
64547748Salcg_raid_md_intel_get_disk(struct g_raid_softc *sc, int id)
64691362Salfred{
647127501Salc	struct g_raid_disk	*disk;
648127501Salc	struct g_raid_md_intel_perdisk *pd;
64991362Salfred
65013907Sdyson	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
65113907Sdyson		pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
65213907Sdyson		if (pd->pd_disk_pos == id)
65313907Sdyson			break;
65413907Sdyson	}
65513907Sdyson	return (disk);
65613907Sdyson}
65713907Sdyson
65813907Sdysonstatic int
65914037Sdysong_raid_md_intel_supported(int level, int qual, int disks, int force)
66013675Sdyson{
66113675Sdyson
66213675Sdyson	switch (level) {
66376760Salfred	case G_RAID_VOLUME_RL_RAID0:
66413675Sdyson		if (disks < 1)
66576760Salfred			return (0);
66613675Sdyson		if (!force && (disks < 2 || disks > 6))
66743623Sdillon			return (0);
66813675Sdyson		break;
66913675Sdyson	case G_RAID_VOLUME_RL_RAID1:
67013675Sdyson		if (disks < 1)
67113675Sdyson			return (0);
67213675Sdyson		if (!force && (disks != 2))
67313675Sdyson			return (0);
67413675Sdyson		break;
67543623Sdillon	case G_RAID_VOLUME_RL_RAID1E:
67643623Sdillon		if (disks < 2)
67747748Salc			return (0);
67843623Sdillon		if (!force && (disks != 4))
67947748Salc			return (0);
68013675Sdyson		break;
68116960Sdyson	case G_RAID_VOLUME_RL_RAID5:
68243623Sdillon		if (disks < 3)
683124394Sdes			return (0);
684116127Smux		if (!force && disks > 6)
685116127Smux			return (0);
68643623Sdillon		if (qual != G_RAID_VOLUME_RLQ_R5LA)
68747748Salc			return (0);
68843623Sdillon		break;
68913675Sdyson	default:
69047748Salc		return (0);
69147748Salc	}
69213675Sdyson	if (level != G_RAID_VOLUME_RL_RAID5 && qual != G_RAID_VOLUME_RLQ_NONE)
69376760Salfred		return (0);
69447748Salc	return (1);
69576760Salfred}
69647748Salc
69791362Salfredstatic struct g_raid_volume *
69891362Salfredg_raid_md_intel_get_volume(struct g_raid_softc *sc, int id)
69977140Salfred{
70047748Salc	struct g_raid_volume	*mvol;
70113675Sdyson
70247748Salc	TAILQ_FOREACH(mvol, &sc->sc_volumes, v_next) {
70347748Salc		if ((intptr_t)(mvol->v_md_data) == id)
70413675Sdyson			break;
70513675Sdyson	}
706101768Srwatson	return (mvol);
707101768Srwatson}
708101768Srwatson
70947748Salcstatic int
71013675Sdysong_raid_md_intel_start_disk(struct g_raid_disk *disk)
71191362Salfred{
71224101Sbde	struct g_raid_softc *sc;
71355112Sbde	struct g_raid_subdisk *sd, *tmpsd;
71447748Salc	struct g_raid_disk *olddisk, *tmpdisk;
71547748Salc	struct g_raid_md_object *md;
71613913Sdyson	struct g_raid_md_intel_object *mdi;
71747748Salc	struct g_raid_md_intel_perdisk *pd, *oldpd;
71847748Salc	struct intel_raid_conf *meta;
71947748Salc	struct intel_raid_vol *mvol;
72013675Sdyson	struct intel_raid_map *mmap0, *mmap1;
72113675Sdyson	int disk_pos, resurrection = 0;
72213675Sdyson
72313675Sdyson	sc = disk->d_softc;
72413675Sdyson	md = sc->sc_md;
72547748Salc	mdi = (struct g_raid_md_intel_object *)md;
72613675Sdyson	meta = mdi->mdio_meta;
72713675Sdyson	pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
72813675Sdyson	olddisk = NULL;
72913675Sdyson
73013675Sdyson	/* Find disk position in metadata by it's serial. */
73113675Sdyson	disk_pos = intel_meta_find_disk(meta, pd->pd_disk_meta.serial);
73214037Sdyson	if (disk_pos < 0) {
73314802Sdyson		G_RAID_DEBUG1(1, sc, "Unknown, probably new or stale disk");
73414037Sdyson		/* Failed stale disk is useless for us. */
73514037Sdyson		if (pd->pd_disk_meta.flags & INTEL_F_FAILED) {
73691362Salfred			g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED);
73776760Salfred			return (0);
73813675Sdyson		}
73913675Sdyson		/* If we are in the start process, that's all for now. */
74014037Sdyson		if (!mdi->mdio_started)
74113907Sdyson			goto nofit;
74213907Sdyson		/*
74313907Sdyson		 * If we have already started - try to get use of the disk.
74413907Sdyson		 * Try to replace OFFLINE disks first, then FAILED.
74513675Sdyson		 */
74613907Sdyson		TAILQ_FOREACH(tmpdisk, &sc->sc_disks, d_next) {
74713907Sdyson			if (tmpdisk->d_state != G_RAID_DISK_S_OFFLINE &&
74813675Sdyson			    tmpdisk->d_state != G_RAID_DISK_S_FAILED)
74913675Sdyson				continue;
750119872Salc			/* Make sure this disk is big enough. */
75118863Sdyson			TAILQ_FOREACH(sd, &tmpdisk->d_subdisks, sd_next) {
752119872Salc				off_t disk_sectors =
753112569Sjake				    intel_get_disk_sectors(&pd->pd_disk_meta);
75413907Sdyson
75591412Salfred				if (sd->sd_offset + sd->sd_size + 4096 >
756133790Ssilby				    disk_sectors * 512) {
757133790Ssilby					G_RAID_DEBUG1(1, sc,
75879224Sdillon					    "Disk too small (%llu < %llu)",
75918863Sdyson					    (unsigned long long)
76013907Sdyson					    disk_sectors * 512,
76113907Sdyson					    (unsigned long long)
76213907Sdyson					    sd->sd_offset + sd->sd_size + 4096);
763119872Salc					break;
76440286Sdg				}
76576760Salfred			}
766193893Scperciva			if (sd != NULL)
767193893Scperciva				continue;
76876760Salfred			if (tmpdisk->d_state == G_RAID_DISK_S_OFFLINE) {
76999899Salc				olddisk = tmpdisk;
77099899Salc				break;
77199899Salc			} else if (olddisk == NULL)
77299899Salc				olddisk = tmpdisk;
77399899Salc		}
774119872Salc		if (olddisk == NULL) {
775119872Salcnofit:
77699899Salc			if (pd->pd_disk_meta.flags & INTEL_F_SPARE) {
777119872Salc				g_raid_change_disk_state(disk,
778118757Salc				    G_RAID_DISK_S_SPARE);
77999899Salc				return (1);
78076760Salfred			} else {
78113907Sdyson				g_raid_change_disk_state(disk,
782120000Salc				    G_RAID_DISK_S_STALE);
783120000Salc				return (0);
784119872Salc			}
785119872Salc		}
78613907Sdyson		oldpd = (struct g_raid_md_intel_perdisk *)olddisk->d_md_data;
78713907Sdyson		disk_pos = oldpd->pd_disk_pos;
78813907Sdyson		resurrection = 1;
78913907Sdyson	}
79013907Sdyson
79113907Sdyson	if (olddisk == NULL) {
79276760Salfred		/* Find placeholder by position. */
79376760Salfred		olddisk = g_raid_md_intel_get_disk(sc, disk_pos);
79413907Sdyson		if (olddisk == NULL)
79513907Sdyson			panic("No disk at position %d!", disk_pos);
79613907Sdyson		if (olddisk->d_state != G_RAID_DISK_S_OFFLINE) {
79713907Sdyson			G_RAID_DEBUG1(1, sc, "More then one disk for pos %d",
79813907Sdyson			    disk_pos);
79913907Sdyson			g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE);
80013907Sdyson			return (0);
801104908Smike		}
80213907Sdyson		oldpd = (struct g_raid_md_intel_perdisk *)olddisk->d_md_data;
80313907Sdyson	}
80413907Sdyson
80513907Sdyson	/* Replace failed disk or placeholder with new disk. */
80676760Salfred	TAILQ_FOREACH_SAFE(sd, &olddisk->d_subdisks, sd_next, tmpsd) {
80713907Sdyson		TAILQ_REMOVE(&olddisk->d_subdisks, sd, sd_next);
80813907Sdyson		TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
80913907Sdyson		sd->sd_disk = disk;
81013907Sdyson	}
81113907Sdyson	oldpd->pd_disk_pos = -2;
81213907Sdyson	pd->pd_disk_pos = disk_pos;
81313907Sdyson
81476760Salfred	/* If it was placeholder -- destroy it. */
81513907Sdyson	if (olddisk->d_state == G_RAID_DISK_S_OFFLINE) {
81613907Sdyson		g_raid_destroy_disk(olddisk);
81776364Salfred	} else {
818127501Salc		/* Otherwise, make it STALE_FAILED. */
81999899Salc		g_raid_change_disk_state(olddisk, G_RAID_DISK_S_STALE_FAILED);
820117325Ssilby		/* Update global metadata just in case. */
821118757Salc		memcpy(&meta->disk[disk_pos], &pd->pd_disk_meta,
822117325Ssilby		    sizeof(struct intel_raid_disk));
82399899Salc	}
82491653Stanimura
82513907Sdyson	/* Welcome the new disk. */
82613907Sdyson	if (resurrection)
82713907Sdyson		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
82813907Sdyson	else if (meta->disk[disk_pos].flags & INTEL_F_FAILED)
82913907Sdyson		g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED);
83013907Sdyson	else if (meta->disk[disk_pos].flags & INTEL_F_SPARE)
83113907Sdyson		g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE);
83213907Sdyson	else
83313907Sdyson		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
83476364Salfred	TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
83513907Sdyson		mvol = intel_get_volume(meta,
836127501Salc		    (uintptr_t)(sd->sd_volume->v_md_data));
837127501Salc		mmap0 = intel_get_map(mvol, 0);
83813907Sdyson		if (mvol->migr_state)
83913907Sdyson			mmap1 = intel_get_map(mvol, 1);
84013907Sdyson		else
84191362Salfred			mmap1 = mmap0;
84213907Sdyson
84313907Sdyson		if (resurrection) {
84413907Sdyson			/* Stale disk, almost same as new. */
84513907Sdyson			g_raid_change_subdisk_state(sd,
84613907Sdyson			    G_RAID_SUBDISK_S_NEW);
84713907Sdyson		} else if (meta->disk[disk_pos].flags & INTEL_F_FAILED) {
84813907Sdyson			/* Failed disk, almost useless. */
84913907Sdyson			g_raid_change_subdisk_state(sd,
850119811Salc			    G_RAID_SUBDISK_S_FAILED);
851127501Salc		} else if (mvol->migr_state == 0) {
852127501Salc			if (mmap0->status == INTEL_S_UNINITIALIZED) {
853127501Salc				/* Freshly created uninitialized volume. */
854127501Salc				g_raid_change_subdisk_state(sd,
855127501Salc				    G_RAID_SUBDISK_S_UNINITIALIZED);
856127501Salc			} else if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
857127501Salc				/* Freshly inserted disk. */
858127501Salc				g_raid_change_subdisk_state(sd,
859127501Salc				    G_RAID_SUBDISK_S_NEW);
860127501Salc			} else if (mvol->dirty) {
861127501Salc				/* Dirty volume (unclean shutdown). */
86213907Sdyson				g_raid_change_subdisk_state(sd,
86313907Sdyson				    G_RAID_SUBDISK_S_STALE);
86413907Sdyson			} else {
86513907Sdyson				/* Up to date disk. */
86613907Sdyson				g_raid_change_subdisk_state(sd,
86713907Sdyson				    G_RAID_SUBDISK_S_ACTIVE);
86813907Sdyson			}
86913907Sdyson		} else if (mvol->migr_type == INTEL_MT_INIT ||
87013907Sdyson			   mvol->migr_type == INTEL_MT_REBUILD) {
87113907Sdyson			if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
87213907Sdyson				/* Freshly inserted disk. */
87313907Sdyson				g_raid_change_subdisk_state(sd,
87413907Sdyson				    G_RAID_SUBDISK_S_NEW);
87513907Sdyson			} else if (mmap1->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
87613907Sdyson				/* Rebuilding disk. */
87713907Sdyson				g_raid_change_subdisk_state(sd,
87876364Salfred				    G_RAID_SUBDISK_S_REBUILD);
87913951Sdyson				if (mvol->dirty) {
88091362Salfred					sd->sd_rebuild_pos = 0;
881133049Ssilby				} else {
882133049Ssilby					sd->sd_rebuild_pos =
883133049Ssilby					    intel_get_vol_curr_migr_unit(mvol) *
884133049Ssilby					    sd->sd_volume->v_strip_size *
885133049Ssilby					    mmap0->total_domains;
886133049Ssilby				}
887133049Ssilby			} else if (mvol->dirty) {
88813907Sdyson				/* Dirty volume (unclean shutdown). */
88976760Salfred				g_raid_change_subdisk_state(sd,
89013951Sdyson				    G_RAID_SUBDISK_S_STALE);
89113951Sdyson			} else {
89213951Sdyson				/* Up to date disk. */
893173750Sdumbbell				g_raid_change_subdisk_state(sd,
89413992Sdyson				    G_RAID_SUBDISK_S_ACTIVE);
895133049Ssilby			}
89691362Salfred		} else if (mvol->migr_type == INTEL_MT_VERIFY ||
89791362Salfred			   mvol->migr_type == INTEL_MT_REPAIR) {
89814802Sdyson			if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
89913907Sdyson				/* Freshly inserted disk. */
900133049Ssilby				g_raid_change_subdisk_state(sd,
901133049Ssilby				    G_RAID_SUBDISK_S_NEW);
90213907Sdyson			} else if (mmap1->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
90313907Sdyson				/* Resyncing disk. */
90413951Sdyson				g_raid_change_subdisk_state(sd,
90576760Salfred				    G_RAID_SUBDISK_S_RESYNC);
90613951Sdyson				if (mvol->dirty) {
90713951Sdyson					sd->sd_rebuild_pos = 0;
90813951Sdyson				} else {
909173750Sdumbbell					sd->sd_rebuild_pos =
91013992Sdyson					    intel_get_vol_curr_migr_unit(mvol) *
911133049Ssilby					    sd->sd_volume->v_strip_size *
91291362Salfred					    mmap0->total_domains;
91391362Salfred				}
91414802Sdyson			} else if (mvol->dirty) {
91513907Sdyson				/* Dirty volume (unclean shutdown). */
916133049Ssilby				g_raid_change_subdisk_state(sd,
917133049Ssilby				    G_RAID_SUBDISK_S_STALE);
91813907Sdyson			} else {
91913907Sdyson				/* Up to date disk. */
92013951Sdyson				g_raid_change_subdisk_state(sd,
92113951Sdyson				    G_RAID_SUBDISK_S_ACTIVE);
922119872Salc			}
92313907Sdyson		}
924119872Salc		g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
92513907Sdyson		    G_RAID_EVENT_SUBDISK);
92613907Sdyson	}
927133049Ssilby
92813907Sdyson	/* Update status of our need for spare. */
92913907Sdyson	if (mdi->mdio_started) {
93013907Sdyson		mdi->mdio_incomplete =
93113907Sdyson		    (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) <
93213907Sdyson		     meta->total_disks);
93313907Sdyson	}
93413907Sdyson
935112981Shsu	return (resurrection);
93613907Sdyson}
93714802Sdyson
93814802Sdysonstatic void
93913907Sdysong_disk_md_intel_retaste(void *arg, int pending)
94013992Sdyson{
94113992Sdyson
94213992Sdyson	G_RAID_DEBUG(1, "Array is not complete, trying to retaste.");
94313992Sdyson	g_retaste(&g_raid_class);
94414037Sdyson	free(arg, M_MD_INTEL);
945133049Ssilby}
94691362Salfred
94791362Salfredstatic void
948133049Ssilbyg_raid_md_intel_refill(struct g_raid_softc *sc)
94913907Sdyson{
95013907Sdyson	struct g_raid_md_object *md;
951126131Sgreen	struct g_raid_md_intel_object *mdi;
952126131Sgreen	struct intel_raid_conf *meta;
95313907Sdyson	struct g_raid_disk *disk;
95413907Sdyson	struct task *task;
95513907Sdyson	int update, na;
95613907Sdyson
95713907Sdyson	md = sc->sc_md;
95813907Sdyson	mdi = (struct g_raid_md_intel_object *)md;
95913907Sdyson	meta = mdi->mdio_meta;
96013907Sdyson	update = 0;
96113907Sdyson	do {
96213907Sdyson		/* Make sure we miss anything. */
96376760Salfred		na = g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE);
96413907Sdyson		if (na == meta->total_disks)
96513907Sdyson			break;
96613907Sdyson
96776760Salfred		G_RAID_DEBUG1(1, md->mdo_softc,
96813907Sdyson		    "Array is not complete (%d of %d), "
96914037Sdyson		    "trying to refill.", na, meta->total_disks);
970124394Sdes
97116960Sdyson		/* Try to get use some of STALE disks. */
972101941Srwatson		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
97316960Sdyson			if (disk->d_state == G_RAID_DISK_S_STALE) {
97413907Sdyson				update += g_raid_md_intel_start_disk(disk);
975101941Srwatson				if (disk->d_state == G_RAID_DISK_S_ACTIVE)
97683366Sjulian					break;
97745311Sdt			}
97813907Sdyson		}
97913675Sdyson		if (disk != NULL)
980133790Ssilby			continue;
98116960Sdyson
98216960Sdyson		/* Try to get use some of SPARE disks. */
983109153Sdillon		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
98416960Sdyson			if (disk->d_state == G_RAID_DISK_S_SPARE) {
98516960Sdyson				update += g_raid_md_intel_start_disk(disk);
98691395Salfred				if (disk->d_state == G_RAID_DISK_S_ACTIVE)
987133049Ssilby					break;
988133049Ssilby			}
989133049Ssilby		}
990133049Ssilby	} while (disk != NULL);
991133049Ssilby
99213675Sdyson	/* Write new metadata if we changed something. */
99313675Sdyson	if (update) {
99413675Sdyson		g_raid_md_write_intel(md, NULL, NULL, NULL);
995179243Skib		meta = mdi->mdio_meta;
996179243Skib	}
997133049Ssilby
99891395Salfred	/* Update status of our need for spare. */
99976760Salfred	mdi->mdio_incomplete = (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) <
100013675Sdyson	    meta->total_disks);
1001101768Srwatson
1002172930Srwatson	/* Request retaste hoping to find spare. */
1003101768Srwatson	if (mdi->mdio_incomplete) {
1004133049Ssilby		task = malloc(sizeof(struct task),
1005101768Srwatson		    M_MD_INTEL, M_WAITOK | M_ZERO);
1006101768Srwatson		TASK_INIT(task, 0, g_disk_md_intel_retaste, task);
1007101768Srwatson		taskqueue_enqueue(taskqueue_swi, task);
1008101768Srwatson	}
100977676Sdillon}
101013675Sdyson
1011133790Ssilbystatic void
1012133790Ssilbyg_raid_md_intel_start(struct g_raid_softc *sc)
1013133790Ssilby{
1014133790Ssilby	struct g_raid_md_object *md;
1015133790Ssilby	struct g_raid_md_intel_object *mdi;
1016133790Ssilby	struct g_raid_md_intel_perdisk *pd;
1017133790Ssilby	struct intel_raid_conf *meta;
1018133790Ssilby	struct intel_raid_vol *mvol;
1019133790Ssilby	struct intel_raid_map *mmap;
1020133790Ssilby	struct g_raid_volume *vol;
1021133790Ssilby	struct g_raid_subdisk *sd;
102217163Sdyson	struct g_raid_disk *disk;
1023133790Ssilby	int i, j, disk_pos;
1024133790Ssilby
1025133790Ssilby	md = sc->sc_md;
1026133790Ssilby	mdi = (struct g_raid_md_intel_object *)md;
1027133790Ssilby	meta = mdi->mdio_meta;
1028133790Ssilby
1029133790Ssilby	/* Create volumes and subdisks. */
1030133790Ssilby	for (i = 0; i < meta->total_volumes; i++) {
1031133790Ssilby		mvol = intel_get_volume(meta, i);
1032133790Ssilby		mmap = intel_get_map(mvol, 0);
1033133049Ssilby		vol = g_raid_create_volume(sc, mvol->name, -1);
1034133790Ssilby		vol->v_md_data = (void *)(intptr_t)i;
1035133049Ssilby		vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
103613907Sdyson		if (mmap->type == INTEL_T_RAID0)
1037133790Ssilby			vol->v_raid_level = G_RAID_VOLUME_RL_RAID0;
1038133790Ssilby		else if (mmap->type == INTEL_T_RAID1 &&
1039133790Ssilby		    mmap->total_domains >= 2 &&
1040133790Ssilby		    mmap->total_domains <= mmap->total_disks) {
1041133790Ssilby			/* Assume total_domains is correct. */
1042133790Ssilby			if (mmap->total_domains == mmap->total_disks)
1043133790Ssilby				vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
1044133790Ssilby			else
1045133790Ssilby				vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
1046133790Ssilby		} else if (mmap->type == INTEL_T_RAID1) {
1047133790Ssilby			/* total_domains looks wrong. */
104877676Sdillon			if (mmap->total_disks <= 2)
1049133049Ssilby				vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
1050124394Sdes			else
105113913Sdyson				vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
105277676Sdillon		} else if (mmap->type == INTEL_T_RAID5) {
105313675Sdyson			vol->v_raid_level = G_RAID_VOLUME_RL_RAID5;
105413907Sdyson			vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_R5LA;
105576760Salfred		} else
1056133049Ssilby			vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
1057133049Ssilby		vol->v_strip_size = (u_int)mmap->strip_sectors * 512; //ZZZ
1058133049Ssilby		vol->v_disks_count = mmap->total_disks;
1059133049Ssilby		vol->v_mediasize = (off_t)mvol->total_sectors * 512; //ZZZ
1060133049Ssilby		vol->v_sectorsize = 512; //ZZZ
1061133049Ssilby		for (j = 0; j < vol->v_disks_count; j++) {
106214037Sdyson			sd = &vol->v_subdisks[j];
106313907Sdyson			sd->sd_offset = intel_get_map_offset(mmap) * 512; //ZZZ
106413907Sdyson			sd->sd_size = intel_get_map_disk_sectors(mmap) * 512; //ZZZ
106513907Sdyson		}
106616416Sdyson		g_raid_start_volume(vol);
106716416Sdyson	}
106858505Sdillon
106958505Sdillon	/* Create disk placeholders to store data for later writing. */
107058505Sdillon	for (disk_pos = 0; disk_pos < meta->total_disks; disk_pos++) {
107113907Sdyson		pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
1072165347Spjd		pd->pd_disk_pos = disk_pos;
1073165347Spjd		pd->pd_disk_meta = meta->disk[disk_pos];
1074165347Spjd		disk = g_raid_create_disk(sc);
1075127501Salc		disk->d_md_data = (void *)pd;
1076133049Ssilby		disk->d_state = G_RAID_DISK_S_OFFLINE;
1077105009Salfred		for (i = 0; i < meta->total_volumes; i++) {
107876760Salfred			mvol = intel_get_volume(meta, i);
107913907Sdyson			mmap = intel_get_map(mvol, 0);
108013907Sdyson			for (j = 0; j < mmap->total_disks; j++) {
108191362Salfred				if ((mmap->disk_idx[j] & INTEL_DI_IDX) == disk_pos)
108214037Sdyson					break;
108313907Sdyson			}
108413907Sdyson			if (j == mmap->total_disks)
108513907Sdyson				continue;
108613907Sdyson			vol = g_raid_md_intel_get_volume(sc, i);
108713907Sdyson			sd = &vol->v_subdisks[j];
108858505Sdillon			sd->sd_disk = disk;
108958505Sdillon			TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
109013907Sdyson		}
1091133049Ssilby	}
109213992Sdyson
109313992Sdyson	/* Make all disks found till the moment take their places. */
109413992Sdyson	do {
109513992Sdyson		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1096173750Sdumbbell			if (disk->d_state == G_RAID_DISK_S_NONE) {
1097173750Sdumbbell				g_raid_md_intel_start_disk(disk);
1098133049Ssilby				break;
109991395Salfred			}
110091362Salfred		}
110113907Sdyson	} while (disk != NULL);
110213907Sdyson
1103133049Ssilby	mdi->mdio_started = 1;
1104133049Ssilby	G_RAID_DEBUG1(0, sc, "Array started.");
110513907Sdyson	g_raid_md_write_intel(md, NULL, NULL, NULL);
110613907Sdyson
110713907Sdyson	/* Pickup any STALE/SPARE disks to refill array if needed. */
110814644Sdyson	g_raid_md_intel_refill(sc);
110914644Sdyson
111013913Sdyson	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
111113913Sdyson		g_raid_event_send(vol, G_RAID_VOLUME_E_START,
111213907Sdyson		    G_RAID_EVENT_VOLUME);
1113118230Spb	}
1114133049Ssilby
1115133049Ssilby	callout_stop(&mdi->mdio_start_co);
111676760Salfred	G_RAID_DEBUG1(1, sc, "root_mount_rel %p", mdi->mdio_rootmount);
1117133049Ssilby	root_mount_rel(mdi->mdio_rootmount);
1118133049Ssilby	mdi->mdio_rootmount = NULL;
1119133049Ssilby}
1120133049Ssilby
1121133049Ssilbystatic void
1122133049Ssilbyg_raid_md_intel_new_disk(struct g_raid_disk *disk)
1123133049Ssilby{
1124133049Ssilby	struct g_raid_softc *sc;
1125133049Ssilby	struct g_raid_md_object *md;
1126133049Ssilby	struct g_raid_md_intel_object *mdi;
1127133049Ssilby	struct intel_raid_conf *pdmeta;
1128133049Ssilby	struct g_raid_md_intel_perdisk *pd;
1129133049Ssilby
1130133049Ssilby	sc = disk->d_softc;
1131133049Ssilby	md = sc->sc_md;
1132133049Ssilby	mdi = (struct g_raid_md_intel_object *)md;
1133133049Ssilby	pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
1134133049Ssilby	pdmeta = pd->pd_meta;
1135133049Ssilby
113654534Stegge	if (mdi->mdio_started) {
1137133049Ssilby		if (g_raid_md_intel_start_disk(disk))
1138133049Ssilby			g_raid_md_write_intel(md, NULL, NULL, NULL);
1139133049Ssilby	} else {
1140133049Ssilby		/* If we haven't started yet - check metadata freshness. */
1141133049Ssilby		if (mdi->mdio_meta == NULL ||
1142133049Ssilby		    ((int32_t)(pdmeta->generation - mdi->mdio_generation)) > 0) {
1143133049Ssilby			G_RAID_DEBUG1(1, sc, "Newer disk");
1144133049Ssilby			if (mdi->mdio_meta != NULL)
1145133049Ssilby				free(mdi->mdio_meta, M_MD_INTEL);
1146133049Ssilby			mdi->mdio_meta = intel_meta_copy(pdmeta);
1147133049Ssilby			mdi->mdio_generation = mdi->mdio_meta->generation;
114854534Stegge			mdi->mdio_disks_present = 1;
1149133049Ssilby		} else if (pdmeta->generation == mdi->mdio_generation) {
1150133049Ssilby			mdi->mdio_disks_present++;
1151133049Ssilby			G_RAID_DEBUG1(1, sc, "Matching disk (%d of %d up)",
115254534Stegge			    mdi->mdio_disks_present,
1153124394Sdes			    mdi->mdio_meta->total_disks);
115491395Salfred		} else {
1155133049Ssilby			G_RAID_DEBUG1(1, sc, "Older disk");
1156133049Ssilby		}
1157133049Ssilby		/* If we collected all needed disks - start array. */
115891395Salfred		if (mdi->mdio_disks_present == mdi->mdio_meta->total_disks)
1159133049Ssilby			g_raid_md_intel_start(sc);
1160133049Ssilby	}
1161133049Ssilby}
1162133049Ssilby
1163133049Ssilbystatic void
1164133049Ssilbyg_raid_intel_go(void *arg)
1165133049Ssilby{
1166133049Ssilby	struct g_raid_softc *sc;
1167133049Ssilby	struct g_raid_md_object *md;
1168133049Ssilby	struct g_raid_md_intel_object *mdi;
116954534Stegge
1170124394Sdes	sc = arg;
1171133049Ssilby	md = sc->sc_md;
1172133049Ssilby	mdi = (struct g_raid_md_intel_object *)md;
1173133049Ssilby	if (!mdi->mdio_started) {
1174133049Ssilby		G_RAID_DEBUG1(0, sc, "Force array start due to timeout.");
117513675Sdyson		g_raid_event_send(sc, G_RAID_NODE_E_START, 0);
1176133049Ssilby	}
1177153484Sdelphij}
1178153484Sdelphij
117913675Sdysonstatic int
118013675Sdysong_raid_md_create_intel(struct g_raid_md_object *md, struct g_class *mp,
118113675Sdyson    struct g_geom **gp)
118213675Sdyson{
118313675Sdyson	struct g_raid_softc *sc;
118413675Sdyson	struct g_raid_md_intel_object *mdi;
118513675Sdyson	char name[16];
118613675Sdyson
118714037Sdyson	mdi = (struct g_raid_md_intel_object *)md;
118813675Sdyson	mdi->mdio_config_id = arc4random();
118913675Sdyson	mdi->mdio_generation = 0;
119013675Sdyson	snprintf(name, sizeof(name), "Intel-%08x", mdi->mdio_config_id);
119116960Sdyson	sc = g_raid_create_node(mp, name, md);
119213907Sdyson	if (sc == NULL)
1193133049Ssilby		return (G_RAID_MD_TASTE_FAIL);
119413675Sdyson	md->mdo_softc = sc;
119513675Sdyson	*gp = sc->sc_geom;
119613907Sdyson	return (G_RAID_MD_TASTE_NEW);
119714037Sdyson}
119814037Sdyson
119929356Speter/*
120014037Sdyson * Return the last N characters of the serial label.  The Linux and
120114037Sdyson * ataraid(7) code always uses the last 16 characters of the label to
120214037Sdyson * store into the Intel meta format.  Generalize this to N characters
120313675Sdyson * since that's easy.  Labels can be up to 20 characters for SATA drives
1204133049Ssilby * and up 251 characters for SAS drives.  Since intel controllers don't
120591395Salfred * support SAS drives, just stick with the SATA limits for stack friendliness.
120691362Salfred */
120776760Salfredstatic int
120813675Sdysong_raid_md_get_label(struct g_consumer *cp, char *serial, int serlen)
120913675Sdyson{
121013675Sdyson	char serial_buffer[24];
121113675Sdyson	int len, error;
1212133049Ssilby
121314644Sdyson	len = sizeof(serial_buffer);
121477676Sdillon	error = g_io_getattr("GEOM::ident", cp, &len, serial_buffer);
121576760Salfred	if (error != 0)
121676760Salfred		return (error);
121713675Sdyson	len = strlen(serial_buffer);
121813675Sdyson	if (len > serlen)
121913675Sdyson		len -= serlen;
122013675Sdyson	else
122113675Sdyson		len = 0;
122213675Sdyson	strncpy(serial, serial_buffer + len, serlen);
122313675Sdyson	return (0);
122413675Sdyson}
122513675Sdyson
122613675Sdysonstatic int
122713675Sdysong_raid_md_taste_intel(struct g_raid_md_object *md, struct g_class *mp,
122813909Sdyson                              struct g_consumer *cp, struct g_geom **gp)
122913909Sdyson{
123013909Sdyson	struct g_consumer *rcp;
123113909Sdyson	struct g_provider *pp;
123213907Sdyson	struct g_raid_md_intel_object *mdi, *mdi1;
123377676Sdillon	struct g_raid_softc *sc;
123477676Sdillon	struct g_raid_disk *disk;
123513907Sdyson	struct intel_raid_conf *meta;
123677676Sdillon	struct g_raid_md_intel_perdisk *pd;
123713913Sdyson	struct g_geom *geom;
123824101Sbde	int error, disk_pos, result, spare, len;
123955112Sbde	char serial[INTEL_SERIAL_LEN];
124024101Sbde	char name[16];
124114037Sdyson	uint16_t vendor;
124214037Sdyson
124329356Speter	G_RAID_DEBUG(1, "Tasting Intel on %s", cp->provider->name);
124414037Sdyson	mdi = (struct g_raid_md_intel_object *)md;
124514177Sdyson	pp = cp->provider;
124614037Sdyson
124713907Sdyson	/* Read metadata from device. */
1248133049Ssilby	meta = NULL;
124991395Salfred	vendor = 0xffff;
125076760Salfred	disk_pos = 0;
125113675Sdyson	if (g_access(cp, 1, 0, 0) != 0)
125213675Sdyson		return (G_RAID_MD_TASTE_FAIL);
1253175140Sjhb	g_topology_unlock();
1254175140Sjhb	error = g_raid_md_get_label(cp, serial, sizeof(serial));
1255175140Sjhb	if (error != 0) {
1256175140Sjhb		G_RAID_DEBUG(1, "Cannot get serial number from %s (error=%d).",
1257175140Sjhb		    pp->name, error);
1258175140Sjhb		goto fail2;
1259175140Sjhb	}
1260175140Sjhb	len = 2;
1261175140Sjhb	if (pp->geom->rank == 1)
1262175140Sjhb		g_io_getattr("GEOM::hba_vendor", cp, &len, &vendor);
1263175140Sjhb	meta = intel_meta_read(cp);
1264175140Sjhb	g_topology_lock();
126513675Sdyson	g_access(cp, -1, 0, 0);
126613675Sdyson	if (meta == NULL) {
126713675Sdyson		if (g_raid_aggressive_spare) {
1268104094Sphk			if (vendor != 0x8086) {
1269102003Srwatson				G_RAID_DEBUG(1,
127013675Sdyson				    "Intel vendor mismatch 0x%04x != 0x8086",
127136735Sdfr				    vendor);
127299009Salfred			} else {
1273102003Srwatson				G_RAID_DEBUG(1,
127483366Sjulian				    "No Intel metadata, forcing spare.");
127513675Sdyson				spare = 2;
1276109153Sdillon				goto search;
1277101768Srwatson			}
127813675Sdyson		}
1279104269Srwatson		return (G_RAID_MD_TASTE_FAIL);
1280104269Srwatson	}
1281104269Srwatson
1282172930Srwatson	/* Check this disk position in obtained metadata. */
1283121970Srwatson	disk_pos = intel_meta_find_disk(meta, serial);
1284121970Srwatson	if (disk_pos < 0) {
1285101768Srwatson		G_RAID_DEBUG(1, "Intel serial '%s' not found", serial);
1286121970Srwatson		goto fail1;
1287101768Srwatson	}
1288101768Srwatson	if (intel_get_disk_sectors(&meta->disk[disk_pos]) !=
1289137752Sphk	    (pp->mediasize / pp->sectorsize)) {
129013675Sdyson		G_RAID_DEBUG(1, "Intel size mismatch %ju != %ju",
129113675Sdyson		    intel_get_disk_sectors(&meta->disk[disk_pos]),
129213675Sdyson		    (off_t)(pp->mediasize / pp->sectorsize));
1293137752Sphk		goto fail1;
129413675Sdyson	}
129513675Sdyson
129613675Sdyson	/* Metadata valid. Print it. */
129713675Sdyson	g_raid_md_intel_print(meta);
129813675Sdyson	G_RAID_DEBUG(1, "Intel disk position %d", disk_pos);
129913675Sdyson	spare = meta->disk[disk_pos].flags & INTEL_F_SPARE;
130013675Sdyson
1301137752Sphksearch:
130213675Sdyson	/* Search for matching node. */
130313675Sdyson	sc = NULL;
130414037Sdyson	mdi1 = NULL;
130514037Sdyson	LIST_FOREACH(geom, &mp->geom, geom) {
130614037Sdyson		sc = geom->softc;
130714037Sdyson		if (sc == NULL)
1308137752Sphk			continue;
130913675Sdyson		if (sc->sc_stopping != 0)
131041086Struckman			continue;
1311138032Srwatson		if (sc->sc_md->mdo_class != md->mdo_class)
1312137752Sphk			continue;
1313138032Srwatson		mdi1 = (struct g_raid_md_intel_object *)sc->sc_md;
131441086Struckman		if (spare) {
131541086Struckman			if (mdi1->mdio_incomplete)
1316104393Struckman				break;
1317137752Sphk		} else {
131813675Sdyson			if (mdi1->mdio_config_id == meta->config_id)
131941086Struckman				break;
132041086Struckman		}
1321138032Srwatson	}
1322137752Sphk
1323138032Srwatson	/* Found matching node. */
132441086Struckman	if (geom != NULL) {
132541086Struckman		G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name);
132618863Sdyson		result = G_RAID_MD_TASTE_EXISTING;
1327104393Struckman
1328137752Sphk	} else if (spare) { /* Not found needy node -- left for later. */
132913675Sdyson		G_RAID_DEBUG(1, "Spare is not needed at this time");
1330137752Sphk		goto fail1;
1331137752Sphk
1332137764Sphk	} else { /* Not found matching node -- create one. */
133313675Sdyson		result = G_RAID_MD_TASTE_NEW;
1334104269Srwatson		mdi->mdio_config_id = meta->config_id;
1335138032Srwatson		snprintf(name, sizeof(name), "Intel-%08x", meta->config_id);
1336137752Sphk		sc = g_raid_create_node(mp, name, md);
133713675Sdyson		md->mdo_softc = sc;
133813675Sdyson		geom = sc->sc_geom;
1339104094Sphk		callout_init(&mdi->mdio_start_co, 1);
1340101983Srwatson		callout_reset(&mdi->mdio_start_co, g_raid_start_timeout * hz,
134113675Sdyson		    g_raid_intel_go, sc);
134229356Speter		mdi->mdio_rootmount = root_mount_hold("GRAID-Intel");
1343101983Srwatson		G_RAID_DEBUG1(1, sc, "root_mount_hold %p", mdi->mdio_rootmount);
134483366Sjulian	}
134513675Sdyson
1346109153Sdillon	rcp = g_new_consumer(geom);
134713675Sdyson	g_attach(rcp, pp);
134829356Speter	if (g_access(rcp, 1, 1, 1) != 0)
1349101768Srwatson		; //goto fail1;
1350101768Srwatson
1351101768Srwatson	g_topology_unlock();
135213675Sdyson	sx_xlock(&sc->sc_lock);
135313675Sdyson
135491362Salfred	pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
1355101768Srwatson	pd->pd_meta = meta;
1356172930Srwatson	pd->pd_disk_pos = -1;
1357101768Srwatson	if (spare == 2) {
1358101768Srwatson		memcpy(&pd->pd_disk_meta.serial[0], serial, INTEL_SERIAL_LEN);
1359101768Srwatson		intel_set_disk_sectors(&pd->pd_disk_meta,
136029356Speter		    pp->mediasize / pp->sectorsize);
136129356Speter		pd->pd_disk_meta.id = 0;
1362195423Skib		pd->pd_disk_meta.flags = INTEL_F_SPARE;
136329356Speter	} else {
136413675Sdyson		pd->pd_disk_meta = meta->disk[disk_pos];
136529356Speter	}
1366179243Skib	disk = g_raid_create_disk(sc);
1367179243Skib	disk->d_md_data = (void *)pd;
136843311Sdillon	disk->d_consumer = rcp;
136943311Sdillon	rcp->private = disk;
137029356Speter
137113675Sdyson	/* Read kernel dumping information. */
1372195423Skib	disk->d_kd.offset = 0;
1373195423Skib	disk->d_kd.length = OFF_MAX;
1374195423Skib	len = sizeof(disk->d_kd);
1375195423Skib	error = g_io_getattr("GEOM::kerneldump", rcp, &len, &disk->d_kd);
1376195423Skib	if (disk->d_kd.di.dumper == NULL)
1377195423Skib		G_RAID_DEBUG1(2, sc, "Dumping not supported by %s: %d.",
1378195423Skib		    rcp->provider->name, error);
1379195423Skib
138029356Speter	g_raid_md_intel_new_disk(disk);
138129356Speter
138229356Speter	sx_xunlock(&sc->sc_lock);
138383805Sjhb	g_topology_lock();
1384174647Sjeff	*gp = geom;
1385174647Sjeff	return (result);
138613675Sdysonfail2:
138713675Sdyson	g_topology_lock();
138829356Speter	g_access(cp, -1, 0, 0);
138983805Sjhbfail1:
1390174647Sjeff	free(meta, M_MD_INTEL);
1391174647Sjeff	return (G_RAID_MD_TASTE_FAIL);
139213907Sdyson}
139313675Sdyson
1394101768Srwatsonstatic int
1395101768Srwatsong_raid_md_event_intel(struct g_raid_md_object *md,
1396101768Srwatson    struct g_raid_disk *disk, u_int event)
139791362Salfred{
139829356Speter	struct g_raid_softc *sc;
139929356Speter	struct g_raid_subdisk *sd;
140013675Sdyson	struct g_raid_md_intel_object *mdi;
140113675Sdyson	struct g_raid_md_intel_perdisk *pd;
140298989Salfred
140398989Salfred	sc = md->mdo_softc;
140498989Salfred	mdi = (struct g_raid_md_intel_object *)md;
140598989Salfred	if (disk == NULL) {
140652983Speter		switch (event) {
1407101983Srwatson		case G_RAID_NODE_E_START:
140852983Speter			if (!mdi->mdio_started)
140952983Speter				g_raid_md_intel_start(sc);
1410101983Srwatson			return (0);
141183366Sjulian		}
141213675Sdyson		return (-1);
1413109153Sdillon	}
1414101768Srwatson	pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
1415101768Srwatson	switch (event) {
141652983Speter	case G_RAID_DISK_E_DISCONNECTED:
1417104269Srwatson		/* If disk was assigned, just update statuses. */
1418172930Srwatson		if (pd->pd_disk_pos >= 0) {
1419104269Srwatson			g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
1420101768Srwatson			if (disk->d_consumer) {
1421101768Srwatson				g_raid_kill_consumer(sc, disk->d_consumer);
1422101768Srwatson				disk->d_consumer = NULL;
1423100527Salfred			}
142417124Sbde			TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
1425133790Ssilby				g_raid_change_subdisk_state(sd,
1426132436Ssilby				    G_RAID_SUBDISK_S_NONE);
1427132436Ssilby				g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
1428132436Ssilby				    G_RAID_EVENT_SUBDISK);
1429132436Ssilby			}
143013675Sdyson		} else {
1431205792Sed			/* Otherwise -- delete. */
1432205792Sed			g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
1433205792Sed			g_raid_destroy_disk(disk);
143460404Schris		}
143560404Schris
143617124Sbde		/* Write updated metadata to all disks. */
143760404Schris		g_raid_md_write_intel(md, NULL, NULL, NULL);
143817124Sbde
143917124Sbde		/* Check if anything left except placeholders. */
144076760Salfred		if (g_raid_ndisks(sc, -1) ==
144113675Sdyson		    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
144213675Sdyson			g_raid_destroy_node(sc, 0);
144313675Sdyson		else
144413675Sdyson			g_raid_md_intel_refill(sc);
144583366Sjulian		return (0);
144613675Sdyson	}
144783366Sjulian	return (-2);
144813675Sdyson}
1449109153Sdillon
145016322Sgpalmerstatic int
145149413Sgreeng_raid_md_ctl_intel(struct g_raid_md_object *md,
1452109153Sdillon    struct gctl_req *req)
145396122Salfred{
145413675Sdyson	struct g_raid_softc *sc;
145576760Salfred	struct g_raid_volume *vol, *vol1;
145613675Sdyson	struct g_raid_subdisk *sd;
145713675Sdyson	struct g_raid_disk *disk;
145876364Salfred	struct g_raid_md_intel_object *mdi;
145976364Salfred	struct g_raid_md_intel_perdisk *pd;
146076364Salfred	struct g_consumer *cp;
146176364Salfred	struct g_provider *pp;
146291412Salfred	char arg[16], serial[INTEL_SERIAL_LEN];
1463125293Srwatson	const char *verb, *volname, *levelname, *diskname;
1464125293Srwatson	char *tmp;
146576364Salfred	int *nargs, *force;
146676364Salfred	off_t off, size, sectorsize, strip, disk_sectors;
1467189649Sjhb	intmax_t *sizearg, *striparg;
1468118764Ssilby	int numdisks, i, len, level, qual, update;
1469118764Ssilby	int error;
1470118764Ssilby
147176364Salfred	sc = md->mdo_softc;
147276364Salfred	mdi = (struct g_raid_md_intel_object *)md;
147376364Salfred	verb = gctl_get_param(req, "verb", NULL);
1474127501Salc	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
147576364Salfred	error = 0;
147676364Salfred	if (strcmp(verb, "label") == 0) {
147776364Salfred
147876364Salfred		if (*nargs < 4) {
147976364Salfred			gctl_error(req, "Invalid number of arguments.");
148076364Salfred			return (-1);
148176364Salfred		}
148213675Sdyson		volname = gctl_get_asciiparam(req, "arg1");
148313675Sdyson		if (volname == NULL) {
148413675Sdyson			gctl_error(req, "No volume name.");
148513675Sdyson			return (-2);
148613675Sdyson		}
148713675Sdyson		levelname = gctl_get_asciiparam(req, "arg2");
148813675Sdyson		if (levelname == NULL) {
1489125293Srwatson			gctl_error(req, "No RAID level.");
149013907Sdyson			return (-3);
149176364Salfred		}
1492125293Srwatson		if (strcasecmp(levelname, "RAID5") == 0)
149391968Salfred			levelname = "RAID5-LA";
1494125293Srwatson		if (g_raid_volume_str2level(levelname, &level, &qual)) {
1495133049Ssilby			gctl_error(req, "Unknown RAID level '%s'.", levelname);
1496125293Srwatson			return (-4);
149791968Salfred		}
149891968Salfred		numdisks = *nargs - 3;
149913907Sdyson		force = gctl_get_paraml(req, "force", sizeof(*force));
150091968Salfred		if (!g_raid_md_intel_supported(level, qual, numdisks,
150191968Salfred		    force ? *force : 0)) {
150291968Salfred			gctl_error(req, "Unsupported RAID level "
150391968Salfred			    "(0x%02x/0x%02x), or number of disks (%d).",
1504126131Sgreen			    level, qual, numdisks);
150591968Salfred			return (-5);
150691968Salfred		}
1507126131Sgreen
1508133049Ssilby		/* Search for disks, connect them and probe. */
150991968Salfred		size = 0x7fffffffffffffffllu;
1510133049Ssilby		sectorsize = 0;
151191968Salfred		for (i = 0; i < numdisks; i++) {
151213675Sdyson			snprintf(arg, sizeof(arg), "arg%d", i + 3);
1513101768Srwatson			diskname = gctl_get_asciiparam(req, arg);
151491968Salfred			if (diskname == NULL) {
1515125293Srwatson				gctl_error(req, "No disk name (%s).", arg);
151691968Salfred				error = -6;
1517125293Srwatson				break;
1518179243Skib			}
151991968Salfred			if (strcmp(diskname, "NONE") == 0) {
152013907Sdyson				cp = NULL;
152191968Salfred				pp = NULL;
152291968Salfred			} else {
1523133741Sjmg				g_topology_lock();
152491968Salfred				cp = g_raid_open_consumer(sc, diskname);
1525125293Srwatson				if (cp == NULL) {
152691968Salfred					gctl_error(req, "Can't open disk '%s'.",
1527125293Srwatson					    diskname);
1528125293Srwatson					g_topology_unlock();
1529125293Srwatson					error = -7;
1530125293Srwatson					break;
153191968Salfred				}
1532125293Srwatson				pp = cp->provider;
1533125293Srwatson			}
1534125293Srwatson			pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
1535179243Skib			pd->pd_disk_pos = i;
1536126131Sgreen			disk = g_raid_create_disk(sc);
1537179243Skib			disk->d_md_data = (void *)pd;
1538179243Skib			disk->d_consumer = cp;
1539179243Skib			if (cp == NULL) {
1540179243Skib				strcpy(&pd->pd_disk_meta.serial[0], "NONE");
1541179243Skib				pd->pd_disk_meta.id = 0xffffffff;
1542179243Skib				pd->pd_disk_meta.flags = INTEL_F_ASSIGNED;
1543133741Sjmg				continue;
1544179243Skib			}
1545133741Sjmg			cp->private = disk;
1546125293Srwatson			g_topology_unlock();
1547125293Srwatson
1548125293Srwatson			error = g_raid_md_get_label(cp,
1549125293Srwatson			    &pd->pd_disk_meta.serial[0], INTEL_SERIAL_LEN);
1550125293Srwatson			if (error != 0) {
1551179243Skib				gctl_error(req,
155291968Salfred				    "Can't get serial for provider '%s'.",
1553125293Srwatson				    diskname);
1554172930Srwatson				error = -8;
1555125293Srwatson				break;
1556125293Srwatson			}
1557125293Srwatson
1558125293Srwatson			/* Read kernel dumping information. */
155913675Sdyson			disk->d_kd.offset = 0;
156059288Sjlemon			disk->d_kd.length = OFF_MAX;
156172521Sjlemon			len = sizeof(disk->d_kd);
156259288Sjlemon			g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd);
156372521Sjlemon			if (disk->d_kd.di.dumper == NULL)
156459288Sjlemon				G_RAID_DEBUG1(2, sc,
156589306Salfred				    "Dumping not supported by %s.",
156659288Sjlemon				    cp->provider->name);
1567109153Sdillon
1568126131Sgreen			intel_set_disk_sectors(&pd->pd_disk_meta,
156972521Sjlemon			    pp->mediasize / pp->sectorsize);
157072521Sjlemon			if (size > pp->mediasize)
157172521Sjlemon				size = pp->mediasize;
157272521Sjlemon			if (sectorsize < pp->sectorsize)
157372521Sjlemon				sectorsize = pp->sectorsize;
157472521Sjlemon			pd->pd_disk_meta.id = 0;
1575179243Skib			pd->pd_disk_meta.flags = INTEL_F_ASSIGNED | INTEL_F_ONLINE;
1576101382Sdes		}
1577126131Sgreen		if (error != 0)
1578118929Sjmg			return (error);
1579126131Sgreen
1580126131Sgreen		if (sectorsize <= 0) {
158172521Sjlemon			gctl_error(req, "Can't get sector size.");
158272521Sjlemon			return (-8);
1583126131Sgreen		}
1584133741Sjmg
158572521Sjlemon		/* Reserve some space for metadata. */
158678292Sjlemon		size -= ((4096 + sectorsize - 1) / sectorsize) * sectorsize;
1587133741Sjmg
158891372Salfred		/* Handle size argument. */
158959288Sjlemon		len = sizeof(*sizearg);
159059288Sjlemon		sizearg = gctl_get_param(req, "size", &len);
159159288Sjlemon		if (sizearg != NULL && len == sizeof(*sizearg) &&
159259288Sjlemon		    *sizearg > 0) {
159359288Sjlemon			if (*sizearg > size) {
159459288Sjlemon				gctl_error(req, "Size too big %lld > %lld.",
1595121018Sjmg				    (long long)*sizearg, (long long)size);
159659288Sjlemon				return (-9);
1597126131Sgreen			}
1598179242Skib			size = *sizearg;
1599121018Sjmg		}
1600133741Sjmg
160191372Salfred		/* Handle strip argument. */
160259288Sjlemon		strip = 131072;
160359288Sjlemon		len = sizeof(*striparg);
160459288Sjlemon		striparg = gctl_get_param(req, "strip", &len);
160559288Sjlemon		if (striparg != NULL && len == sizeof(*striparg) &&
160659288Sjlemon		    *striparg > 0) {
160759288Sjlemon			if (*striparg < sectorsize) {
1608109153Sdillon				gctl_error(req, "Strip size too small.");
160959288Sjlemon				return (-10);
1610133741Sjmg			}
161159288Sjlemon			if (*striparg % sectorsize != 0) {
161291372Salfred				gctl_error(req, "Incorrect strip size.");
161359288Sjlemon				return (-11);
161459288Sjlemon			}
161559288Sjlemon			if (strip > 65535 * sectorsize) {
161659288Sjlemon				gctl_error(req, "Strip size too big.");
161759288Sjlemon				return (-12);
1618179243Skib			}
1619179243Skib			strip = *striparg;
162091372Salfred		}
162191372Salfred
162259288Sjlemon		/* Round size down to strip or sector. */
162359288Sjlemon		if (level == G_RAID_VOLUME_RL_RAID1)
1624133741Sjmg			size -= (size % sectorsize);
162591372Salfred		else if (level == G_RAID_VOLUME_RL_RAID1E &&
1626133741Sjmg		    (numdisks & 1) != 0)
162759288Sjlemon			size -= (size % (2 * strip));
162859288Sjlemon		else
162959288Sjlemon			size -= (size % strip);
163059288Sjlemon		if (size <= 0) {
163159288Sjlemon			gctl_error(req, "Size too small.");
163259288Sjlemon			return (-13);
1633109153Sdillon		}
163459288Sjlemon
163559288Sjlemon		/* We have all we need, create things: volume, ... */
163691372Salfred		mdi->mdio_started = 1;
1637179243Skib		vol = g_raid_create_volume(sc, volname, -1);
1638179243Skib		vol->v_md_data = (void *)(intptr_t)0;
163959288Sjlemon		vol->v_raid_level = level;
1640124394Sdes		vol->v_raid_level_qualifier = qual;
164191372Salfred		vol->v_strip_size = strip;
164259288Sjlemon		vol->v_disks_count = numdisks;
164359288Sjlemon		if (level == G_RAID_VOLUME_RL_RAID0)
164459288Sjlemon			vol->v_mediasize = size * numdisks;
164565855Sjlemon		else if (level == G_RAID_VOLUME_RL_RAID1)
164659288Sjlemon			vol->v_mediasize = size;
164759288Sjlemon		else if (level == G_RAID_VOLUME_RL_RAID5)
164891372Salfred			vol->v_mediasize = size * (numdisks - 1);
164959288Sjlemon		else { /* RAID1E */
165059288Sjlemon			vol->v_mediasize = ((size * numdisks) / strip / 2) *
1651			    strip;
1652		}
1653		vol->v_sectorsize = sectorsize;
1654		g_raid_start_volume(vol);
1655
1656		/* , and subdisks. */
1657		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1658			pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
1659			sd = &vol->v_subdisks[pd->pd_disk_pos];
1660			sd->sd_disk = disk;
1661			sd->sd_offset = 0;
1662			sd->sd_size = size;
1663			TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
1664			if (sd->sd_disk->d_consumer != NULL) {
1665				g_raid_change_disk_state(disk,
1666				    G_RAID_DISK_S_ACTIVE);
1667				if (level == G_RAID_VOLUME_RL_RAID5)
1668					g_raid_change_subdisk_state(sd,
1669					    G_RAID_SUBDISK_S_UNINITIALIZED);
1670				else
1671					g_raid_change_subdisk_state(sd,
1672					    G_RAID_SUBDISK_S_ACTIVE);
1673				g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
1674				    G_RAID_EVENT_SUBDISK);
1675			} else {
1676				g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
1677			}
1678		}
1679
1680		/* Write metadata based on created entities. */
1681		G_RAID_DEBUG1(0, sc, "Array started.");
1682		g_raid_md_write_intel(md, NULL, NULL, NULL);
1683
1684		/* Pickup any STALE/SPARE disks to refill array if needed. */
1685		g_raid_md_intel_refill(sc);
1686
1687		g_raid_event_send(vol, G_RAID_VOLUME_E_START,
1688		    G_RAID_EVENT_VOLUME);
1689		return (0);
1690	}
1691	if (strcmp(verb, "add") == 0) {
1692
1693		if (*nargs != 3) {
1694			gctl_error(req, "Invalid number of arguments.");
1695			return (-1);
1696		}
1697		volname = gctl_get_asciiparam(req, "arg1");
1698		if (volname == NULL) {
1699			gctl_error(req, "No volume name.");
1700			return (-2);
1701		}
1702		levelname = gctl_get_asciiparam(req, "arg2");
1703		if (levelname == NULL) {
1704			gctl_error(req, "No RAID level.");
1705			return (-3);
1706		}
1707		if (strcasecmp(levelname, "RAID5") == 0)
1708			levelname = "RAID5-LA";
1709		if (g_raid_volume_str2level(levelname, &level, &qual)) {
1710			gctl_error(req, "Unknown RAID level '%s'.", levelname);
1711			return (-4);
1712		}
1713
1714		/* Look for existing volumes. */
1715		i = 0;
1716		vol1 = NULL;
1717		TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
1718			vol1 = vol;
1719			i++;
1720		}
1721		if (i > 1) {
1722			gctl_error(req, "Maximum two volumes supported.");
1723			return (-6);
1724		}
1725		if (vol1 == NULL) {
1726			gctl_error(req, "At least one volume must exist.");
1727			return (-7);
1728		}
1729
1730		numdisks = vol1->v_disks_count;
1731		force = gctl_get_paraml(req, "force", sizeof(*force));
1732		if (!g_raid_md_intel_supported(level, qual, numdisks,
1733		    force ? *force : 0)) {
1734			gctl_error(req, "Unsupported RAID level "
1735			    "(0x%02x/0x%02x), or number of disks (%d).",
1736			    level, qual, numdisks);
1737			return (-5);
1738		}
1739
1740		/* Collect info about present disks. */
1741		size = 0x7fffffffffffffffllu;
1742		sectorsize = 512;
1743		for (i = 0; i < numdisks; i++) {
1744			disk = vol1->v_subdisks[i].sd_disk;
1745			pd = (struct g_raid_md_intel_perdisk *)
1746			    disk->d_md_data;
1747			disk_sectors =
1748			    intel_get_disk_sectors(&pd->pd_disk_meta);
1749
1750			if (disk_sectors * 512 < size)
1751				size = disk_sectors * 512;
1752			if (disk->d_consumer != NULL &&
1753			    disk->d_consumer->provider != NULL &&
1754			    disk->d_consumer->provider->sectorsize >
1755			     sectorsize) {
1756				sectorsize =
1757				    disk->d_consumer->provider->sectorsize;
1758			}
1759		}
1760
1761		/* Reserve some space for metadata. */
1762		size -= ((4096 + sectorsize - 1) / sectorsize) * sectorsize;
1763
1764		/* Decide insert before or after. */
1765		sd = &vol1->v_subdisks[0];
1766		if (sd->sd_offset >
1767		    size - (sd->sd_offset + sd->sd_size)) {
1768			off = 0;
1769			size = sd->sd_offset;
1770		} else {
1771			off = sd->sd_offset + sd->sd_size;
1772			size = size - (sd->sd_offset + sd->sd_size);
1773		}
1774
1775		/* Handle strip argument. */
1776		strip = 131072;
1777		len = sizeof(*striparg);
1778		striparg = gctl_get_param(req, "strip", &len);
1779		if (striparg != NULL && len == sizeof(*striparg) &&
1780		    *striparg > 0) {
1781			if (*striparg < sectorsize) {
1782				gctl_error(req, "Strip size too small.");
1783				return (-10);
1784			}
1785			if (*striparg % sectorsize != 0) {
1786				gctl_error(req, "Incorrect strip size.");
1787				return (-11);
1788			}
1789			if (strip > 65535 * sectorsize) {
1790				gctl_error(req, "Strip size too big.");
1791				return (-12);
1792			}
1793			strip = *striparg;
1794		}
1795
1796		/* Round offset up to strip. */
1797		if (off % strip != 0) {
1798			size -= strip - off % strip;
1799			off += strip - off % strip;
1800		}
1801
1802		/* Handle size argument. */
1803		len = sizeof(*sizearg);
1804		sizearg = gctl_get_param(req, "size", &len);
1805		if (sizearg != NULL && len == sizeof(*sizearg) &&
1806		    *sizearg > 0) {
1807			if (*sizearg > size) {
1808				gctl_error(req, "Size too big %lld > %lld.",
1809				    (long long)*sizearg, (long long)size);
1810				return (-9);
1811			}
1812			size = *sizearg;
1813		}
1814
1815		/* Round size down to strip or sector. */
1816		if (level == G_RAID_VOLUME_RL_RAID1)
1817			size -= (size % sectorsize);
1818		else
1819			size -= (size % strip);
1820		if (size <= 0) {
1821			gctl_error(req, "Size too small.");
1822			return (-13);
1823		}
1824		if (size > 0xffffffffllu * sectorsize) {
1825			gctl_error(req, "Size too big.");
1826			return (-14);
1827		}
1828
1829		/* We have all we need, create things: volume, ... */
1830		vol = g_raid_create_volume(sc, volname, -1);
1831		vol->v_md_data = (void *)(intptr_t)i;
1832		vol->v_raid_level = level;
1833		vol->v_raid_level_qualifier = qual;
1834		vol->v_strip_size = strip;
1835		vol->v_disks_count = numdisks;
1836		if (level == G_RAID_VOLUME_RL_RAID0)
1837			vol->v_mediasize = size * numdisks;
1838		else if (level == G_RAID_VOLUME_RL_RAID1)
1839			vol->v_mediasize = size;
1840		else if (level == G_RAID_VOLUME_RL_RAID5)
1841			vol->v_mediasize = size * (numdisks - 1);
1842		else { /* RAID1E */
1843			vol->v_mediasize = ((size * numdisks) / strip / 2) *
1844			    strip;
1845		}
1846		vol->v_sectorsize = sectorsize;
1847		g_raid_start_volume(vol);
1848
1849		/* , and subdisks. */
1850		for (i = 0; i < numdisks; i++) {
1851			disk = vol1->v_subdisks[i].sd_disk;
1852			sd = &vol->v_subdisks[i];
1853			sd->sd_disk = disk;
1854			sd->sd_offset = off;
1855			sd->sd_size = size;
1856			TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
1857			if (disk->d_state == G_RAID_DISK_S_ACTIVE) {
1858				if (level == G_RAID_VOLUME_RL_RAID5)
1859					g_raid_change_subdisk_state(sd,
1860					    G_RAID_SUBDISK_S_UNINITIALIZED);
1861				else
1862					g_raid_change_subdisk_state(sd,
1863					    G_RAID_SUBDISK_S_ACTIVE);
1864				g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
1865				    G_RAID_EVENT_SUBDISK);
1866			}
1867		}
1868
1869		/* Write metadata based on created entities. */
1870		g_raid_md_write_intel(md, NULL, NULL, NULL);
1871
1872		g_raid_event_send(vol, G_RAID_VOLUME_E_START,
1873		    G_RAID_EVENT_VOLUME);
1874		return (0);
1875	}
1876	if (strcmp(verb, "delete") == 0) {
1877
1878		/* Full node destruction. */
1879		if (*nargs == 1) {
1880			/* Check if some volume is still open. */
1881			force = gctl_get_paraml(req, "force", sizeof(*force));
1882			if (force != NULL && *force == 0 &&
1883			    g_raid_nopens(sc) != 0) {
1884				gctl_error(req, "Some volume is still open.");
1885				return (-4);
1886			}
1887
1888			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1889				if (disk->d_consumer)
1890					intel_meta_erase(disk->d_consumer);
1891			}
1892			g_raid_destroy_node(sc, 0);
1893			return (0);
1894		}
1895
1896		/* Destroy specified volume. If it was last - all node. */
1897		if (*nargs != 2) {
1898			gctl_error(req, "Invalid number of arguments.");
1899			return (-1);
1900		}
1901		volname = gctl_get_asciiparam(req, "arg1");
1902		if (volname == NULL) {
1903			gctl_error(req, "No volume name.");
1904			return (-2);
1905		}
1906
1907		/* Search for volume. */
1908		TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
1909			if (strcmp(vol->v_name, volname) == 0)
1910				break;
1911		}
1912		if (vol == NULL) {
1913			i = strtol(volname, &tmp, 10);
1914			if (verb != volname && tmp[0] == 0) {
1915				TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
1916					if (vol->v_global_id == i)
1917						break;
1918				}
1919			}
1920		}
1921		if (vol == NULL) {
1922			gctl_error(req, "Volume '%s' not found.", volname);
1923			return (-3);
1924		}
1925
1926		/* Check if volume is still open. */
1927		force = gctl_get_paraml(req, "force", sizeof(*force));
1928		if (force != NULL && *force == 0 &&
1929		    vol->v_provider_open != 0) {
1930			gctl_error(req, "Volume is still open.");
1931			return (-4);
1932		}
1933
1934		/* Destroy volume and potentially node. */
1935		i = 0;
1936		TAILQ_FOREACH(vol1, &sc->sc_volumes, v_next)
1937			i++;
1938		if (i >= 2) {
1939			g_raid_destroy_volume(vol);
1940			g_raid_md_write_intel(md, NULL, NULL, NULL);
1941		} else {
1942			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1943				if (disk->d_consumer)
1944					intel_meta_erase(disk->d_consumer);
1945			}
1946			g_raid_destroy_node(sc, 0);
1947		}
1948		return (0);
1949	}
1950	if (strcmp(verb, "remove") == 0 ||
1951	    strcmp(verb, "fail") == 0) {
1952		if (*nargs < 2) {
1953			gctl_error(req, "Invalid number of arguments.");
1954			return (-1);
1955		}
1956		for (i = 1; i < *nargs; i++) {
1957			snprintf(arg, sizeof(arg), "arg%d", i);
1958			diskname = gctl_get_asciiparam(req, arg);
1959			if (diskname == NULL) {
1960				gctl_error(req, "No disk name (%s).", arg);
1961				error = -2;
1962				break;
1963			}
1964			if (strncmp(diskname, "/dev/", 5) == 0)
1965				diskname += 5;
1966
1967			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1968				if (disk->d_consumer != NULL &&
1969				    disk->d_consumer->provider != NULL &&
1970				    strcmp(disk->d_consumer->provider->name,
1971				     diskname) == 0)
1972					break;
1973			}
1974			if (disk == NULL) {
1975				gctl_error(req, "Disk '%s' not found.",
1976				    diskname);
1977				error = -3;
1978				break;
1979			}
1980
1981			if (strcmp(verb, "fail") == 0) {
1982				g_raid_md_fail_disk_intel(md, NULL, disk);
1983				continue;
1984			}
1985
1986			pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
1987
1988			/* Erase metadata on deleting disk. */
1989			intel_meta_erase(disk->d_consumer);
1990
1991			/* If disk was assigned, just update statuses. */
1992			if (pd->pd_disk_pos >= 0) {
1993				g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
1994				g_raid_kill_consumer(sc, disk->d_consumer);
1995				disk->d_consumer = NULL;
1996				TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
1997					g_raid_change_subdisk_state(sd,
1998					    G_RAID_SUBDISK_S_NONE);
1999					g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
2000					    G_RAID_EVENT_SUBDISK);
2001				}
2002			} else {
2003				/* Otherwise -- delete. */
2004				g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
2005				g_raid_destroy_disk(disk);
2006			}
2007		}
2008
2009		/* Write updated metadata to remaining disks. */
2010		g_raid_md_write_intel(md, NULL, NULL, NULL);
2011
2012		/* Check if anything left except placeholders. */
2013		if (g_raid_ndisks(sc, -1) ==
2014		    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
2015			g_raid_destroy_node(sc, 0);
2016		else
2017			g_raid_md_intel_refill(sc);
2018		return (error);
2019	}
2020	if (strcmp(verb, "insert") == 0) {
2021		if (*nargs < 2) {
2022			gctl_error(req, "Invalid number of arguments.");
2023			return (-1);
2024		}
2025		update = 0;
2026		for (i = 1; i < *nargs; i++) {
2027			/* Get disk name. */
2028			snprintf(arg, sizeof(arg), "arg%d", i);
2029			diskname = gctl_get_asciiparam(req, arg);
2030			if (diskname == NULL) {
2031				gctl_error(req, "No disk name (%s).", arg);
2032				error = -3;
2033				break;
2034			}
2035
2036			/* Try to find provider with specified name. */
2037			g_topology_lock();
2038			cp = g_raid_open_consumer(sc, diskname);
2039			if (cp == NULL) {
2040				gctl_error(req, "Can't open disk '%s'.",
2041				    diskname);
2042				g_topology_unlock();
2043				error = -4;
2044				break;
2045			}
2046			pp = cp->provider;
2047			g_topology_unlock();
2048
2049			/* Read disk serial. */
2050			error = g_raid_md_get_label(cp,
2051			    &serial[0], INTEL_SERIAL_LEN);
2052			if (error != 0) {
2053				gctl_error(req,
2054				    "Can't get serial for provider '%s'.",
2055				    diskname);
2056				g_raid_kill_consumer(sc, cp);
2057				error = -7;
2058				break;
2059			}
2060
2061			pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
2062			pd->pd_disk_pos = -1;
2063
2064			disk = g_raid_create_disk(sc);
2065			disk->d_consumer = cp;
2066			disk->d_md_data = (void *)pd;
2067			cp->private = disk;
2068
2069			/* Read kernel dumping information. */
2070			disk->d_kd.offset = 0;
2071			disk->d_kd.length = OFF_MAX;
2072			len = sizeof(disk->d_kd);
2073			g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd);
2074			if (disk->d_kd.di.dumper == NULL)
2075				G_RAID_DEBUG1(2, sc,
2076				    "Dumping not supported by %s.",
2077				    cp->provider->name);
2078
2079			memcpy(&pd->pd_disk_meta.serial[0], &serial[0],
2080			    INTEL_SERIAL_LEN);
2081			intel_set_disk_sectors(&pd->pd_disk_meta,
2082			    pp->mediasize / pp->sectorsize);
2083			pd->pd_disk_meta.id = 0;
2084			pd->pd_disk_meta.flags = INTEL_F_SPARE;
2085
2086			/* Welcome the "new" disk. */
2087			update += g_raid_md_intel_start_disk(disk);
2088			if (disk->d_state == G_RAID_DISK_S_SPARE) {
2089				intel_meta_write_spare(cp, &pd->pd_disk_meta);
2090				g_raid_destroy_disk(disk);
2091			} else if (disk->d_state != G_RAID_DISK_S_ACTIVE) {
2092				gctl_error(req, "Disk '%s' doesn't fit.",
2093				    diskname);
2094				g_raid_destroy_disk(disk);
2095				error = -8;
2096				break;
2097			}
2098		}
2099
2100		/* Write new metadata if we changed something. */
2101		if (update)
2102			g_raid_md_write_intel(md, NULL, NULL, NULL);
2103		return (error);
2104	}
2105	return (-100);
2106}
2107
2108static int
2109g_raid_md_write_intel(struct g_raid_md_object *md, struct g_raid_volume *tvol,
2110    struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
2111{
2112	struct g_raid_softc *sc;
2113	struct g_raid_volume *vol;
2114	struct g_raid_subdisk *sd;
2115	struct g_raid_disk *disk;
2116	struct g_raid_md_intel_object *mdi;
2117	struct g_raid_md_intel_perdisk *pd;
2118	struct intel_raid_conf *meta;
2119	struct intel_raid_vol *mvol;
2120	struct intel_raid_map *mmap0, *mmap1;
2121	off_t sectorsize = 512, pos;
2122	const char *version, *cv;
2123	int vi, sdi, numdisks, len, state, stale;
2124
2125	sc = md->mdo_softc;
2126	mdi = (struct g_raid_md_intel_object *)md;
2127
2128	if (sc->sc_stopping == G_RAID_DESTROY_HARD)
2129		return (0);
2130
2131	/* Bump generation. Newly written metadata may differ from previous. */
2132	mdi->mdio_generation++;
2133
2134	/* Count number of disks. */
2135	numdisks = 0;
2136	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2137		pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
2138		if (pd->pd_disk_pos < 0)
2139			continue;
2140		numdisks++;
2141		if (disk->d_state == G_RAID_DISK_S_ACTIVE) {
2142			pd->pd_disk_meta.flags =
2143			    INTEL_F_ONLINE | INTEL_F_ASSIGNED;
2144		} else if (disk->d_state == G_RAID_DISK_S_FAILED) {
2145			pd->pd_disk_meta.flags = INTEL_F_FAILED | INTEL_F_ASSIGNED;
2146		} else {
2147			pd->pd_disk_meta.flags = INTEL_F_ASSIGNED;
2148			if (pd->pd_disk_meta.id != 0xffffffff) {
2149				pd->pd_disk_meta.id = 0xffffffff;
2150				len = strlen(pd->pd_disk_meta.serial);
2151				len = min(len, INTEL_SERIAL_LEN - 3);
2152				strcpy(pd->pd_disk_meta.serial + len, ":0");
2153			}
2154		}
2155	}
2156
2157	/* Fill anchor and disks. */
2158	meta = malloc(INTEL_MAX_MD_SIZE(numdisks),
2159	    M_MD_INTEL, M_WAITOK | M_ZERO);
2160	memcpy(&meta->intel_id[0], INTEL_MAGIC, sizeof(INTEL_MAGIC) - 1);
2161	meta->config_size = INTEL_MAX_MD_SIZE(numdisks);
2162	meta->config_id = mdi->mdio_config_id;
2163	meta->generation = mdi->mdio_generation;
2164	meta->attributes = INTEL_ATTR_CHECKSUM;
2165	meta->total_disks = numdisks;
2166	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2167		pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
2168		if (pd->pd_disk_pos < 0)
2169			continue;
2170		meta->disk[pd->pd_disk_pos] = pd->pd_disk_meta;
2171	}
2172
2173	/* Fill volumes and maps. */
2174	vi = 0;
2175	version = INTEL_VERSION_1000;
2176	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
2177		if (vol->v_stopping)
2178			continue;
2179		mvol = intel_get_volume(meta, vi);
2180
2181		/* New metadata may have different volumes order. */
2182		vol->v_md_data = (void *)(intptr_t)vi;
2183
2184		for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
2185			sd = &vol->v_subdisks[sdi];
2186			if (sd->sd_disk != NULL)
2187				break;
2188		}
2189		if (sdi >= vol->v_disks_count)
2190			panic("No any filled subdisk in volume");
2191		if (vol->v_mediasize >= 0x20000000000llu)
2192			meta->attributes |= INTEL_ATTR_2TB;
2193		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0)
2194			meta->attributes |= INTEL_ATTR_RAID0;
2195		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
2196			meta->attributes |= INTEL_ATTR_RAID1;
2197		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5)
2198			meta->attributes |= INTEL_ATTR_RAID5;
2199		else
2200			meta->attributes |= INTEL_ATTR_RAID10;
2201
2202		if (meta->attributes & INTEL_ATTR_2TB)
2203			cv = INTEL_VERSION_1300;
2204//		else if (dev->status == DEV_CLONE_N_GO)
2205//			cv = INTEL_VERSION_1206;
2206		else if (vol->v_disks_count > 4)
2207			cv = INTEL_VERSION_1204;
2208		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5)
2209			cv = INTEL_VERSION_1202;
2210		else if (vol->v_disks_count > 2)
2211			cv = INTEL_VERSION_1201;
2212		else if (vi > 0)
2213			cv = INTEL_VERSION_1200;
2214		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
2215			cv = INTEL_VERSION_1100;
2216		else
2217			cv = INTEL_VERSION_1000;
2218		if (strcmp(cv, version) > 0)
2219			version = cv;
2220
2221		strlcpy(&mvol->name[0], vol->v_name, sizeof(mvol->name));
2222		mvol->total_sectors = vol->v_mediasize / sectorsize;
2223
2224		/* Check for any recovery in progress. */
2225		state = G_RAID_SUBDISK_S_ACTIVE;
2226		pos = 0x7fffffffffffffffllu;
2227		stale = 0;
2228		for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
2229			sd = &vol->v_subdisks[sdi];
2230			if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD)
2231				state = G_RAID_SUBDISK_S_REBUILD;
2232			else if (sd->sd_state == G_RAID_SUBDISK_S_RESYNC &&
2233			    state != G_RAID_SUBDISK_S_REBUILD)
2234				state = G_RAID_SUBDISK_S_RESYNC;
2235			else if (sd->sd_state == G_RAID_SUBDISK_S_STALE)
2236				stale = 1;
2237			if ((sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
2238			    sd->sd_state == G_RAID_SUBDISK_S_RESYNC) &&
2239			     sd->sd_rebuild_pos < pos)
2240			        pos = sd->sd_rebuild_pos;
2241		}
2242		if (state == G_RAID_SUBDISK_S_REBUILD) {
2243			mvol->migr_state = 1;
2244			mvol->migr_type = INTEL_MT_REBUILD;
2245		} else if (state == G_RAID_SUBDISK_S_RESYNC) {
2246			mvol->migr_state = 1;
2247			/* mvol->migr_type = INTEL_MT_REPAIR; */
2248			mvol->migr_type = INTEL_MT_VERIFY;
2249			mvol->state |= INTEL_ST_VERIFY_AND_FIX;
2250		} else
2251			mvol->migr_state = 0;
2252		mvol->dirty = (vol->v_dirty || stale);
2253
2254		mmap0 = intel_get_map(mvol, 0);
2255
2256		/* Write map / common part of two maps. */
2257		intel_set_map_offset(mmap0, sd->sd_offset / sectorsize);
2258		intel_set_map_disk_sectors(mmap0, sd->sd_size / sectorsize);
2259		mmap0->strip_sectors = vol->v_strip_size / sectorsize;
2260		if (vol->v_state == G_RAID_VOLUME_S_BROKEN)
2261			mmap0->status = INTEL_S_FAILURE;
2262		else if (vol->v_state == G_RAID_VOLUME_S_DEGRADED)
2263			mmap0->status = INTEL_S_DEGRADED;
2264		else if (g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_UNINITIALIZED)
2265		    == g_raid_nsubdisks(vol, -1))
2266			mmap0->status = INTEL_S_UNINITIALIZED;
2267		else
2268			mmap0->status = INTEL_S_READY;
2269		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0)
2270			mmap0->type = INTEL_T_RAID0;
2271		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 ||
2272		    vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
2273			mmap0->type = INTEL_T_RAID1;
2274		else
2275			mmap0->type = INTEL_T_RAID5;
2276		mmap0->total_disks = vol->v_disks_count;
2277		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
2278			mmap0->total_domains = vol->v_disks_count;
2279		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
2280			mmap0->total_domains = 2;
2281		else
2282			mmap0->total_domains = 1;
2283		intel_set_map_stripe_count(mmap0,
2284		    sd->sd_size / vol->v_strip_size / mmap0->total_domains);
2285		mmap0->failed_disk_num = 0xff;
2286		mmap0->ddf = 1;
2287
2288		/* If there are two maps - copy common and update. */
2289		if (mvol->migr_state) {
2290			intel_set_vol_curr_migr_unit(mvol,
2291			    pos / vol->v_strip_size / mmap0->total_domains);
2292			mmap1 = intel_get_map(mvol, 1);
2293			memcpy(mmap1, mmap0, sizeof(struct intel_raid_map));
2294			mmap0->status = INTEL_S_READY;
2295		} else
2296			mmap1 = NULL;
2297
2298		/* Write disk indexes and put rebuild flags. */
2299		for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
2300			sd = &vol->v_subdisks[sdi];
2301			pd = (struct g_raid_md_intel_perdisk *)
2302			    sd->sd_disk->d_md_data;
2303			mmap0->disk_idx[sdi] = pd->pd_disk_pos;
2304			if (mvol->migr_state)
2305				mmap1->disk_idx[sdi] = pd->pd_disk_pos;
2306			if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
2307			    sd->sd_state == G_RAID_SUBDISK_S_RESYNC) {
2308				mmap1->disk_idx[sdi] |= INTEL_DI_RBLD;
2309			} else if (sd->sd_state != G_RAID_SUBDISK_S_ACTIVE &&
2310			    sd->sd_state != G_RAID_SUBDISK_S_STALE &&
2311			    sd->sd_state != G_RAID_SUBDISK_S_UNINITIALIZED) {
2312				mmap0->disk_idx[sdi] |= INTEL_DI_RBLD;
2313				if (mvol->migr_state)
2314					mmap1->disk_idx[sdi] |= INTEL_DI_RBLD;
2315			}
2316			if ((sd->sd_state == G_RAID_SUBDISK_S_NONE ||
2317			     sd->sd_state == G_RAID_SUBDISK_S_FAILED) &&
2318			    mmap0->failed_disk_num == 0xff) {
2319				mmap0->failed_disk_num = sdi;
2320				if (mvol->migr_state)
2321					mmap1->failed_disk_num = sdi;
2322			}
2323		}
2324		vi++;
2325	}
2326	meta->total_volumes = vi;
2327	if (strcmp(version, INTEL_VERSION_1300) != 0)
2328		meta->attributes &= INTEL_ATTR_CHECKSUM;
2329	memcpy(&meta->version[0], version, sizeof(INTEL_VERSION_1000) - 1);
2330
2331	/* We are done. Print meta data and store them to disks. */
2332	g_raid_md_intel_print(meta);
2333	if (mdi->mdio_meta != NULL)
2334		free(mdi->mdio_meta, M_MD_INTEL);
2335	mdi->mdio_meta = meta;
2336	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2337		pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
2338		if (disk->d_state != G_RAID_DISK_S_ACTIVE)
2339			continue;
2340		if (pd->pd_meta != NULL) {
2341			free(pd->pd_meta, M_MD_INTEL);
2342			pd->pd_meta = NULL;
2343		}
2344		pd->pd_meta = intel_meta_copy(meta);
2345		intel_meta_write(disk->d_consumer, meta);
2346	}
2347	return (0);
2348}
2349
2350static int
2351g_raid_md_fail_disk_intel(struct g_raid_md_object *md,
2352    struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
2353{
2354	struct g_raid_softc *sc;
2355	struct g_raid_md_intel_object *mdi;
2356	struct g_raid_md_intel_perdisk *pd;
2357	struct g_raid_subdisk *sd;
2358
2359	sc = md->mdo_softc;
2360	mdi = (struct g_raid_md_intel_object *)md;
2361	pd = (struct g_raid_md_intel_perdisk *)tdisk->d_md_data;
2362
2363	/* We can't fail disk that is not a part of array now. */
2364	if (pd->pd_disk_pos < 0)
2365		return (-1);
2366
2367	/*
2368	 * Mark disk as failed in metadata and try to write that metadata
2369	 * to the disk itself to prevent it's later resurrection as STALE.
2370	 */
2371	mdi->mdio_meta->disk[pd->pd_disk_pos].flags = INTEL_F_FAILED;
2372	pd->pd_disk_meta.flags = INTEL_F_FAILED;
2373	g_raid_md_intel_print(mdi->mdio_meta);
2374	if (tdisk->d_consumer)
2375		intel_meta_write(tdisk->d_consumer, mdi->mdio_meta);
2376
2377	/* Change states. */
2378	g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED);
2379	TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) {
2380		g_raid_change_subdisk_state(sd,
2381		    G_RAID_SUBDISK_S_FAILED);
2382		g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED,
2383		    G_RAID_EVENT_SUBDISK);
2384	}
2385
2386	/* Write updated metadata to remaining disks. */
2387	g_raid_md_write_intel(md, NULL, NULL, tdisk);
2388
2389	/* Check if anything left except placeholders. */
2390	if (g_raid_ndisks(sc, -1) ==
2391	    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
2392		g_raid_destroy_node(sc, 0);
2393	else
2394		g_raid_md_intel_refill(sc);
2395	return (0);
2396}
2397
2398static int
2399g_raid_md_free_disk_intel(struct g_raid_md_object *md,
2400    struct g_raid_disk *disk)
2401{
2402	struct g_raid_md_intel_perdisk *pd;
2403
2404	pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
2405	if (pd->pd_meta != NULL) {
2406		free(pd->pd_meta, M_MD_INTEL);
2407		pd->pd_meta = NULL;
2408	}
2409	free(pd, M_MD_INTEL);
2410	disk->d_md_data = NULL;
2411	return (0);
2412}
2413
2414static int
2415g_raid_md_free_intel(struct g_raid_md_object *md)
2416{
2417	struct g_raid_md_intel_object *mdi;
2418
2419	mdi = (struct g_raid_md_intel_object *)md;
2420	if (!mdi->mdio_started) {
2421		mdi->mdio_started = 0;
2422		callout_stop(&mdi->mdio_start_co);
2423		G_RAID_DEBUG1(1, md->mdo_softc,
2424		    "root_mount_rel %p", mdi->mdio_rootmount);
2425		root_mount_rel(mdi->mdio_rootmount);
2426		mdi->mdio_rootmount = NULL;
2427	}
2428	if (mdi->mdio_meta != NULL) {
2429		free(mdi->mdio_meta, M_MD_INTEL);
2430		mdi->mdio_meta = NULL;
2431	}
2432	return (0);
2433}
2434
2435G_RAID_MD_DECLARE(g_raid_md_intel);
2436