md_intel.c revision 235092
1/*-
2 * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
3 * Copyright (c) 2000 - 2008 S��ren Schmidt <sos@FreeBSD.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/9/sys/geom/raid/md_intel.c 235092 2012-05-06 15:55:01Z mav $");
30
31#include <sys/param.h>
32#include <sys/bio.h>
33#include <sys/endian.h>
34#include <sys/kernel.h>
35#include <sys/kobj.h>
36#include <sys/limits.h>
37#include <sys/lock.h>
38#include <sys/malloc.h>
39#include <sys/mutex.h>
40#include <sys/systm.h>
41#include <sys/taskqueue.h>
42#include <geom/geom.h>
43#include "geom/raid/g_raid.h"
44#include "g_raid_md_if.h"
45
46static MALLOC_DEFINE(M_MD_INTEL, "md_intel_data", "GEOM_RAID Intel metadata");
47
48struct intel_raid_map {
49	uint32_t	offset;
50	uint32_t	disk_sectors;
51	uint32_t	stripe_count;
52	uint16_t	strip_sectors;
53	uint8_t		status;
54#define INTEL_S_READY           0x00
55#define INTEL_S_UNINITIALIZED   0x01
56#define INTEL_S_DEGRADED        0x02
57#define INTEL_S_FAILURE         0x03
58
59	uint8_t		type;
60#define INTEL_T_RAID0           0x00
61#define INTEL_T_RAID1           0x01
62#define INTEL_T_RAID5           0x05
63
64	uint8_t		total_disks;
65	uint8_t		total_domains;
66	uint8_t		failed_disk_num;
67	uint8_t		ddf;
68	uint32_t	offset_hi;
69	uint32_t	disk_sectors_hi;
70	uint32_t	stripe_count_hi;
71	uint32_t	filler_2[4];
72	uint32_t	disk_idx[1];	/* total_disks entries. */
73#define INTEL_DI_IDX	0x00ffffff
74#define INTEL_DI_RBLD	0x01000000
75} __packed;
76
77struct intel_raid_vol {
78	uint8_t		name[16];
79	u_int64_t	total_sectors __packed;
80	uint32_t	state;
81#define INTEL_ST_BOOTABLE		0x00000001
82#define INTEL_ST_BOOT_DEVICE		0x00000002
83#define INTEL_ST_READ_COALESCING	0x00000004
84#define INTEL_ST_WRITE_COALESCING	0x00000008
85#define INTEL_ST_LAST_SHUTDOWN_DIRTY	0x00000010
86#define INTEL_ST_HIDDEN_AT_BOOT		0x00000020
87#define INTEL_ST_CURRENTLY_HIDDEN	0x00000040
88#define INTEL_ST_VERIFY_AND_FIX		0x00000080
89#define INTEL_ST_MAP_STATE_UNINIT	0x00000100
90#define INTEL_ST_NO_AUTO_RECOVERY	0x00000200
91#define INTEL_ST_CLONE_N_GO		0x00000400
92#define INTEL_ST_CLONE_MAN_SYNC		0x00000800
93#define INTEL_ST_CNG_MASTER_DISK_NUM	0x00001000
94	uint32_t	reserved;
95	uint8_t		migr_priority;
96	uint8_t		num_sub_vols;
97	uint8_t		tid;
98	uint8_t		cng_master_disk;
99	uint16_t	cache_policy;
100	uint8_t		cng_state;
101	uint8_t		cng_sub_state;
102	uint32_t	filler_0[10];
103
104	uint32_t	curr_migr_unit;
105	uint32_t	checkpoint_id;
106	uint8_t		migr_state;
107	uint8_t		migr_type;
108#define INTEL_MT_INIT		0
109#define INTEL_MT_REBUILD	1
110#define INTEL_MT_VERIFY		2
111#define INTEL_MT_GEN_MIGR	3
112#define INTEL_MT_STATE_CHANGE	4
113#define INTEL_MT_REPAIR		5
114	uint8_t		dirty;
115	uint8_t		fs_state;
116	uint16_t	verify_errors;
117	uint16_t	bad_blocks;
118	uint32_t	curr_migr_unit_hi;
119	uint32_t	filler_1[3];
120	struct intel_raid_map map[1];	/* 2 entries if migr_state != 0. */
121} __packed;
122
123struct intel_raid_disk {
124#define INTEL_SERIAL_LEN	16
125	uint8_t		serial[INTEL_SERIAL_LEN];
126	uint32_t	sectors;
127	uint32_t	id;
128	uint32_t	flags;
129#define INTEL_F_SPARE		0x01
130#define INTEL_F_ASSIGNED	0x02
131#define INTEL_F_FAILED		0x04
132#define INTEL_F_ONLINE		0x08
133	uint32_t	owner_cfg_num;
134	uint32_t	sectors_hi;
135	uint32_t	filler[3];
136} __packed;
137
138struct intel_raid_conf {
139	uint8_t		intel_id[24];
140#define INTEL_MAGIC             "Intel Raid ISM Cfg Sig. "
141
142	uint8_t		version[6];
143#define INTEL_VERSION_1000	"1.0.00"	/* RAID0 */
144#define INTEL_VERSION_1100	"1.1.00"	/* RAID1 */
145#define INTEL_VERSION_1200	"1.2.00"	/* Many volumes */
146#define INTEL_VERSION_1201	"1.2.01"	/* 3 or 4 disks */
147#define INTEL_VERSION_1202	"1.2.02"	/* RAID5 */
148#define INTEL_VERSION_1204	"1.2.04"	/* 5 or 6 disks */
149#define INTEL_VERSION_1206	"1.2.06"	/* CNG */
150#define INTEL_VERSION_1300	"1.3.00"	/* Attributes */
151
152	uint8_t		dummy_0[2];
153	uint32_t	checksum;
154	uint32_t	config_size;
155	uint32_t	config_id;
156	uint32_t	generation;
157	uint32_t	error_log_size;
158	uint32_t	attributes;
159#define INTEL_ATTR_RAID0	0x00000001
160#define INTEL_ATTR_RAID1	0x00000002
161#define INTEL_ATTR_RAID10	0x00000004
162#define INTEL_ATTR_RAID1E	0x00000008
163#define INTEL_ATTR_RAID5	0x00000010
164#define INTEL_ATTR_RAIDCNG	0x00000020
165#define INTEL_ATTR_2TB		0x20000000
166#define INTEL_ATTR_PM		0x40000000
167#define INTEL_ATTR_CHECKSUM	0x80000000
168
169	uint8_t		total_disks;
170	uint8_t		total_volumes;
171	uint8_t		dummy_2[2];
172	uint32_t	filler_0[39];
173	struct intel_raid_disk	disk[1];	/* total_disks entries. */
174	/* Here goes total_volumes of struct intel_raid_vol. */
175} __packed;
176
177#define INTEL_MAX_MD_SIZE(ndisks)				\
178    (sizeof(struct intel_raid_conf) +				\
179     sizeof(struct intel_raid_disk) * (ndisks - 1) +		\
180     sizeof(struct intel_raid_vol) * 2 +			\
181     sizeof(struct intel_raid_map) * 2 +			\
182     sizeof(uint32_t) * (ndisks - 1) * 4)
183
184struct g_raid_md_intel_perdisk {
185	struct intel_raid_conf	*pd_meta;
186	int			 pd_disk_pos;
187	struct intel_raid_disk	 pd_disk_meta;
188};
189
190struct g_raid_md_intel_object {
191	struct g_raid_md_object	 mdio_base;
192	uint32_t		 mdio_config_id;
193	uint32_t		 mdio_generation;
194	struct intel_raid_conf	*mdio_meta;
195	struct callout		 mdio_start_co;	/* STARTING state timer. */
196	int			 mdio_disks_present;
197	int			 mdio_started;
198	int			 mdio_incomplete;
199	struct root_hold_token	*mdio_rootmount; /* Root mount delay token. */
200};
201
202static g_raid_md_create_t g_raid_md_create_intel;
203static g_raid_md_taste_t g_raid_md_taste_intel;
204static g_raid_md_event_t g_raid_md_event_intel;
205static g_raid_md_ctl_t g_raid_md_ctl_intel;
206static g_raid_md_write_t g_raid_md_write_intel;
207static g_raid_md_fail_disk_t g_raid_md_fail_disk_intel;
208static g_raid_md_free_disk_t g_raid_md_free_disk_intel;
209static g_raid_md_free_t g_raid_md_free_intel;
210
211static kobj_method_t g_raid_md_intel_methods[] = {
212	KOBJMETHOD(g_raid_md_create,	g_raid_md_create_intel),
213	KOBJMETHOD(g_raid_md_taste,	g_raid_md_taste_intel),
214	KOBJMETHOD(g_raid_md_event,	g_raid_md_event_intel),
215	KOBJMETHOD(g_raid_md_ctl,	g_raid_md_ctl_intel),
216	KOBJMETHOD(g_raid_md_write,	g_raid_md_write_intel),
217	KOBJMETHOD(g_raid_md_fail_disk,	g_raid_md_fail_disk_intel),
218	KOBJMETHOD(g_raid_md_free_disk,	g_raid_md_free_disk_intel),
219	KOBJMETHOD(g_raid_md_free,	g_raid_md_free_intel),
220	{ 0, 0 }
221};
222
223static struct g_raid_md_class g_raid_md_intel_class = {
224	"Intel",
225	g_raid_md_intel_methods,
226	sizeof(struct g_raid_md_intel_object),
227	.mdc_priority = 100
228};
229
230
231static struct intel_raid_map *
232intel_get_map(struct intel_raid_vol *mvol, int i)
233{
234	struct intel_raid_map *mmap;
235
236	if (i > (mvol->migr_state ? 1 : 0))
237		return (NULL);
238	mmap = &mvol->map[0];
239	for (; i > 0; i--) {
240		mmap = (struct intel_raid_map *)
241		    &mmap->disk_idx[mmap->total_disks];
242	}
243	return ((struct intel_raid_map *)mmap);
244}
245
246static struct intel_raid_vol *
247intel_get_volume(struct intel_raid_conf *meta, int i)
248{
249	struct intel_raid_vol *mvol;
250	struct intel_raid_map *mmap;
251
252	if (i > 1)
253		return (NULL);
254	mvol = (struct intel_raid_vol *)&meta->disk[meta->total_disks];
255	for (; i > 0; i--) {
256		mmap = intel_get_map(mvol, mvol->migr_state ? 1 : 0);
257		mvol = (struct intel_raid_vol *)
258		    &mmap->disk_idx[mmap->total_disks];
259	}
260	return (mvol);
261}
262
263static off_t
264intel_get_map_offset(struct intel_raid_map *mmap)
265{
266	off_t offset = (off_t)mmap->offset_hi << 32;
267
268	offset += mmap->offset;
269	return (offset);
270}
271
272static void
273intel_set_map_offset(struct intel_raid_map *mmap, off_t offset)
274{
275
276	mmap->offset = offset & 0xffffffff;
277	mmap->offset_hi = offset >> 32;
278}
279
280static off_t
281intel_get_map_disk_sectors(struct intel_raid_map *mmap)
282{
283	off_t disk_sectors = (off_t)mmap->disk_sectors_hi << 32;
284
285	disk_sectors += mmap->disk_sectors;
286	return (disk_sectors);
287}
288
289static void
290intel_set_map_disk_sectors(struct intel_raid_map *mmap, off_t disk_sectors)
291{
292
293	mmap->disk_sectors = disk_sectors & 0xffffffff;
294	mmap->disk_sectors_hi = disk_sectors >> 32;
295}
296
297static void
298intel_set_map_stripe_count(struct intel_raid_map *mmap, off_t stripe_count)
299{
300
301	mmap->stripe_count = stripe_count & 0xffffffff;
302	mmap->stripe_count_hi = stripe_count >> 32;
303}
304
305static off_t
306intel_get_disk_sectors(struct intel_raid_disk *disk)
307{
308	off_t sectors = (off_t)disk->sectors_hi << 32;
309
310	sectors += disk->sectors;
311	return (sectors);
312}
313
314static void
315intel_set_disk_sectors(struct intel_raid_disk *disk, off_t sectors)
316{
317
318	disk->sectors = sectors & 0xffffffff;
319	disk->sectors_hi = sectors >> 32;
320}
321
322static off_t
323intel_get_vol_curr_migr_unit(struct intel_raid_vol *vol)
324{
325	off_t curr_migr_unit = (off_t)vol->curr_migr_unit_hi << 32;
326
327	curr_migr_unit += vol->curr_migr_unit;
328	return (curr_migr_unit);
329}
330
331static void
332intel_set_vol_curr_migr_unit(struct intel_raid_vol *vol, off_t curr_migr_unit)
333{
334
335	vol->curr_migr_unit = curr_migr_unit & 0xffffffff;
336	vol->curr_migr_unit_hi = curr_migr_unit >> 32;
337}
338
339static void
340g_raid_md_intel_print(struct intel_raid_conf *meta)
341{
342	struct intel_raid_vol *mvol;
343	struct intel_raid_map *mmap;
344	int i, j, k;
345
346	if (g_raid_debug < 1)
347		return;
348
349	printf("********* ATA Intel MatrixRAID Metadata *********\n");
350	printf("intel_id            <%.24s>\n", meta->intel_id);
351	printf("version             <%.6s>\n", meta->version);
352	printf("checksum            0x%08x\n", meta->checksum);
353	printf("config_size         0x%08x\n", meta->config_size);
354	printf("config_id           0x%08x\n", meta->config_id);
355	printf("generation          0x%08x\n", meta->generation);
356	printf("attributes          0x%08x\n", meta->attributes);
357	printf("total_disks         %u\n", meta->total_disks);
358	printf("total_volumes       %u\n", meta->total_volumes);
359	printf("DISK#   serial disk_sectors disk_sectors_hi disk_id flags\n");
360	for (i = 0; i < meta->total_disks; i++ ) {
361		printf("    %d   <%.16s> %u %u 0x%08x 0x%08x\n", i,
362		    meta->disk[i].serial, meta->disk[i].sectors,
363		    meta->disk[i].sectors_hi,
364		    meta->disk[i].id, meta->disk[i].flags);
365	}
366	for (i = 0; i < meta->total_volumes; i++) {
367		mvol = intel_get_volume(meta, i);
368		printf(" ****** Volume %d ******\n", i);
369		printf(" name               %.16s\n", mvol->name);
370		printf(" total_sectors      %ju\n", mvol->total_sectors);
371		printf(" state              %u\n", mvol->state);
372		printf(" reserved           %u\n", mvol->reserved);
373		printf(" curr_migr_unit     %u\n", mvol->curr_migr_unit);
374		printf(" curr_migr_unit_hi  %u\n", mvol->curr_migr_unit_hi);
375		printf(" checkpoint_id      %u\n", mvol->checkpoint_id);
376		printf(" migr_state         %u\n", mvol->migr_state);
377		printf(" migr_type          %u\n", mvol->migr_type);
378		printf(" dirty              %u\n", mvol->dirty);
379
380		for (j = 0; j < (mvol->migr_state ? 2 : 1); j++) {
381			printf("  *** Map %d ***\n", j);
382			mmap = intel_get_map(mvol, j);
383			printf("  offset            %u\n", mmap->offset);
384			printf("  offset_hi         %u\n", mmap->offset_hi);
385			printf("  disk_sectors      %u\n", mmap->disk_sectors);
386			printf("  disk_sectors_hi   %u\n", mmap->disk_sectors_hi);
387			printf("  stripe_count      %u\n", mmap->stripe_count);
388			printf("  stripe_count_hi   %u\n", mmap->stripe_count_hi);
389			printf("  strip_sectors     %u\n", mmap->strip_sectors);
390			printf("  status            %u\n", mmap->status);
391			printf("  type              %u\n", mmap->type);
392			printf("  total_disks       %u\n", mmap->total_disks);
393			printf("  total_domains     %u\n", mmap->total_domains);
394			printf("  failed_disk_num   %u\n", mmap->failed_disk_num);
395			printf("  ddf               %u\n", mmap->ddf);
396			printf("  disk_idx         ");
397			for (k = 0; k < mmap->total_disks; k++)
398				printf(" 0x%08x", mmap->disk_idx[k]);
399			printf("\n");
400		}
401	}
402	printf("=================================================\n");
403}
404
405static struct intel_raid_conf *
406intel_meta_copy(struct intel_raid_conf *meta)
407{
408	struct intel_raid_conf *nmeta;
409
410	nmeta = malloc(meta->config_size, M_MD_INTEL, M_WAITOK);
411	memcpy(nmeta, meta, meta->config_size);
412	return (nmeta);
413}
414
415static int
416intel_meta_find_disk(struct intel_raid_conf *meta, char *serial)
417{
418	int pos;
419
420	for (pos = 0; pos < meta->total_disks; pos++) {
421		if (strncmp(meta->disk[pos].serial,
422		    serial, INTEL_SERIAL_LEN) == 0)
423			return (pos);
424	}
425	return (-1);
426}
427
428static struct intel_raid_conf *
429intel_meta_read(struct g_consumer *cp)
430{
431	struct g_provider *pp;
432	struct intel_raid_conf *meta;
433	struct intel_raid_vol *mvol;
434	struct intel_raid_map *mmap;
435	char *buf;
436	int error, i, j, k, left, size;
437	uint32_t checksum, *ptr;
438
439	pp = cp->provider;
440
441	/* Read the anchor sector. */
442	buf = g_read_data(cp,
443	    pp->mediasize - pp->sectorsize * 2, pp->sectorsize, &error);
444	if (buf == NULL) {
445		G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).",
446		    pp->name, error);
447		return (NULL);
448	}
449	meta = (struct intel_raid_conf *)buf;
450
451	/* Check if this is an Intel RAID struct */
452	if (strncmp(meta->intel_id, INTEL_MAGIC, strlen(INTEL_MAGIC))) {
453		G_RAID_DEBUG(1, "Intel signature check failed on %s", pp->name);
454		g_free(buf);
455		return (NULL);
456	}
457	if (meta->config_size > 65536 ||
458	    meta->config_size < sizeof(struct intel_raid_conf)) {
459		G_RAID_DEBUG(1, "Intel metadata size looks wrong: %d",
460		    meta->config_size);
461		g_free(buf);
462		return (NULL);
463	}
464	size = meta->config_size;
465	meta = malloc(size, M_MD_INTEL, M_WAITOK);
466	memcpy(meta, buf, min(size, pp->sectorsize));
467	g_free(buf);
468
469	/* Read all the rest, if needed. */
470	if (meta->config_size > pp->sectorsize) {
471		left = (meta->config_size - 1) / pp->sectorsize;
472		buf = g_read_data(cp,
473		    pp->mediasize - pp->sectorsize * (2 + left),
474		    pp->sectorsize * left, &error);
475		if (buf == NULL) {
476			G_RAID_DEBUG(1, "Cannot read remaining metadata"
477			    " part from %s (error=%d).",
478			    pp->name, error);
479			free(meta, M_MD_INTEL);
480			return (NULL);
481		}
482		memcpy(((char *)meta) + pp->sectorsize, buf,
483		    pp->sectorsize * left);
484		g_free(buf);
485	}
486
487	/* Check metadata checksum. */
488	for (checksum = 0, ptr = (uint32_t *)meta, i = 0;
489	    i < (meta->config_size / sizeof(uint32_t)); i++) {
490		checksum += *ptr++;
491	}
492	checksum -= meta->checksum;
493	if (checksum != meta->checksum) {
494		G_RAID_DEBUG(1, "Intel checksum check failed on %s", pp->name);
495		free(meta, M_MD_INTEL);
496		return (NULL);
497	}
498
499	/* Validate metadata size. */
500	size = sizeof(struct intel_raid_conf) +
501	    sizeof(struct intel_raid_disk) * (meta->total_disks - 1) +
502	    sizeof(struct intel_raid_vol) * meta->total_volumes;
503	if (size > meta->config_size) {
504badsize:
505		G_RAID_DEBUG(1, "Intel metadata size incorrect %d < %d",
506		    meta->config_size, size);
507		free(meta, M_MD_INTEL);
508		return (NULL);
509	}
510	for (i = 0; i < meta->total_volumes; i++) {
511		mvol = intel_get_volume(meta, i);
512		mmap = intel_get_map(mvol, 0);
513		size += 4 * (mmap->total_disks - 1);
514		if (size > meta->config_size)
515			goto badsize;
516		if (mvol->migr_state) {
517			size += sizeof(struct intel_raid_map);
518			if (size > meta->config_size)
519				goto badsize;
520			mmap = intel_get_map(mvol, 1);
521			size += 4 * (mmap->total_disks - 1);
522			if (size > meta->config_size)
523				goto badsize;
524		}
525	}
526
527	/* Validate disk indexes. */
528	for (i = 0; i < meta->total_volumes; i++) {
529		mvol = intel_get_volume(meta, i);
530		for (j = 0; j < (mvol->migr_state ? 2 : 1); j++) {
531			mmap = intel_get_map(mvol, j);
532			for (k = 0; k < mmap->total_disks; k++) {
533				if ((mmap->disk_idx[k] & INTEL_DI_IDX) >
534				    meta->total_disks) {
535					G_RAID_DEBUG(1, "Intel metadata disk"
536					    " index %d too big (>%d)",
537					    mmap->disk_idx[k] & INTEL_DI_IDX,
538					    meta->total_disks);
539					free(meta, M_MD_INTEL);
540					return (NULL);
541				}
542			}
543		}
544	}
545
546	/* Validate migration types. */
547	for (i = 0; i < meta->total_volumes; i++) {
548		mvol = intel_get_volume(meta, i);
549		if (mvol->migr_state &&
550		    mvol->migr_type != INTEL_MT_INIT &&
551		    mvol->migr_type != INTEL_MT_REBUILD &&
552		    mvol->migr_type != INTEL_MT_VERIFY &&
553		    mvol->migr_type != INTEL_MT_REPAIR) {
554			G_RAID_DEBUG(1, "Intel metadata has unsupported"
555			    " migration type %d", mvol->migr_type);
556			free(meta, M_MD_INTEL);
557			return (NULL);
558		}
559	}
560
561	return (meta);
562}
563
564static int
565intel_meta_write(struct g_consumer *cp, struct intel_raid_conf *meta)
566{
567	struct g_provider *pp;
568	char *buf;
569	int error, i, sectors;
570	uint32_t checksum, *ptr;
571
572	pp = cp->provider;
573
574	/* Recalculate checksum for case if metadata were changed. */
575	meta->checksum = 0;
576	for (checksum = 0, ptr = (uint32_t *)meta, i = 0;
577	    i < (meta->config_size / sizeof(uint32_t)); i++) {
578		checksum += *ptr++;
579	}
580	meta->checksum = checksum;
581
582	/* Create and fill buffer. */
583	sectors = (meta->config_size + pp->sectorsize - 1) / pp->sectorsize;
584	buf = malloc(sectors * pp->sectorsize, M_MD_INTEL, M_WAITOK | M_ZERO);
585	if (sectors > 1) {
586		memcpy(buf, ((char *)meta) + pp->sectorsize,
587		    (sectors - 1) * pp->sectorsize);
588	}
589	memcpy(buf + (sectors - 1) * pp->sectorsize, meta, pp->sectorsize);
590
591	error = g_write_data(cp,
592	    pp->mediasize - pp->sectorsize * (1 + sectors),
593	    buf, pp->sectorsize * sectors);
594	if (error != 0) {
595		G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).",
596		    pp->name, error);
597	}
598
599	free(buf, M_MD_INTEL);
600	return (error);
601}
602
603static int
604intel_meta_erase(struct g_consumer *cp)
605{
606	struct g_provider *pp;
607	char *buf;
608	int error;
609
610	pp = cp->provider;
611	buf = malloc(pp->sectorsize, M_MD_INTEL, M_WAITOK | M_ZERO);
612	error = g_write_data(cp,
613	    pp->mediasize - 2 * pp->sectorsize,
614	    buf, pp->sectorsize);
615	if (error != 0) {
616		G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).",
617		    pp->name, error);
618	}
619	free(buf, M_MD_INTEL);
620	return (error);
621}
622
623static int
624intel_meta_write_spare(struct g_consumer *cp, struct intel_raid_disk *d)
625{
626	struct intel_raid_conf *meta;
627	int error;
628
629	/* Fill anchor and single disk. */
630	meta = malloc(INTEL_MAX_MD_SIZE(1), M_MD_INTEL, M_WAITOK | M_ZERO);
631	memcpy(&meta->intel_id[0], INTEL_MAGIC, sizeof(INTEL_MAGIC) - 1);
632	memcpy(&meta->version[0], INTEL_VERSION_1000,
633	    sizeof(INTEL_VERSION_1000) - 1);
634	meta->config_size = INTEL_MAX_MD_SIZE(1);
635	meta->config_id = arc4random();
636	meta->generation = 1;
637	meta->total_disks = 1;
638	meta->disk[0] = *d;
639	error = intel_meta_write(cp, meta);
640	free(meta, M_MD_INTEL);
641	return (error);
642}
643
644static struct g_raid_disk *
645g_raid_md_intel_get_disk(struct g_raid_softc *sc, int id)
646{
647	struct g_raid_disk	*disk;
648	struct g_raid_md_intel_perdisk *pd;
649
650	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
651		pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
652		if (pd->pd_disk_pos == id)
653			break;
654	}
655	return (disk);
656}
657
658static int
659g_raid_md_intel_supported(int level, int qual, int disks, int force)
660{
661
662	switch (level) {
663	case G_RAID_VOLUME_RL_RAID0:
664		if (disks < 1)
665			return (0);
666		if (!force && (disks < 2 || disks > 6))
667			return (0);
668		break;
669	case G_RAID_VOLUME_RL_RAID1:
670		if (disks < 1)
671			return (0);
672		if (!force && (disks != 2))
673			return (0);
674		break;
675	case G_RAID_VOLUME_RL_RAID1E:
676		if (disks < 2)
677			return (0);
678		if (!force && (disks != 4))
679			return (0);
680		break;
681	case G_RAID_VOLUME_RL_RAID5:
682		if (disks < 3)
683			return (0);
684		if (!force && disks > 6)
685			return (0);
686		break;
687	default:
688		return (0);
689	}
690	if (qual != G_RAID_VOLUME_RLQ_NONE)
691		return (0);
692	return (1);
693}
694
695static struct g_raid_volume *
696g_raid_md_intel_get_volume(struct g_raid_softc *sc, int id)
697{
698	struct g_raid_volume	*mvol;
699
700	TAILQ_FOREACH(mvol, &sc->sc_volumes, v_next) {
701		if ((intptr_t)(mvol->v_md_data) == id)
702			break;
703	}
704	return (mvol);
705}
706
707static int
708g_raid_md_intel_start_disk(struct g_raid_disk *disk)
709{
710	struct g_raid_softc *sc;
711	struct g_raid_subdisk *sd, *tmpsd;
712	struct g_raid_disk *olddisk, *tmpdisk;
713	struct g_raid_md_object *md;
714	struct g_raid_md_intel_object *mdi;
715	struct g_raid_md_intel_perdisk *pd, *oldpd;
716	struct intel_raid_conf *meta;
717	struct intel_raid_vol *mvol;
718	struct intel_raid_map *mmap0, *mmap1;
719	int disk_pos, resurrection = 0;
720
721	sc = disk->d_softc;
722	md = sc->sc_md;
723	mdi = (struct g_raid_md_intel_object *)md;
724	meta = mdi->mdio_meta;
725	pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
726	olddisk = NULL;
727
728	/* Find disk position in metadata by it's serial. */
729	disk_pos = intel_meta_find_disk(meta, pd->pd_disk_meta.serial);
730	if (disk_pos < 0) {
731		G_RAID_DEBUG1(1, sc, "Unknown, probably new or stale disk");
732		/* Failed stale disk is useless for us. */
733		if (pd->pd_disk_meta.flags & INTEL_F_FAILED) {
734			g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED);
735			return (0);
736		}
737		/* If we are in the start process, that's all for now. */
738		if (!mdi->mdio_started)
739			goto nofit;
740		/*
741		 * If we have already started - try to get use of the disk.
742		 * Try to replace OFFLINE disks first, then FAILED.
743		 */
744		TAILQ_FOREACH(tmpdisk, &sc->sc_disks, d_next) {
745			if (tmpdisk->d_state != G_RAID_DISK_S_OFFLINE &&
746			    tmpdisk->d_state != G_RAID_DISK_S_FAILED)
747				continue;
748			/* Make sure this disk is big enough. */
749			TAILQ_FOREACH(sd, &tmpdisk->d_subdisks, sd_next) {
750				off_t disk_sectors =
751				    intel_get_disk_sectors(&pd->pd_disk_meta);
752
753				if (sd->sd_offset + sd->sd_size + 4096 >
754				    disk_sectors * 512) {
755					G_RAID_DEBUG1(1, sc,
756					    "Disk too small (%llu < %llu)",
757					    (unsigned long long)
758					    disk_sectors * 512,
759					    (unsigned long long)
760					    sd->sd_offset + sd->sd_size + 4096);
761					break;
762				}
763			}
764			if (sd != NULL)
765				continue;
766			if (tmpdisk->d_state == G_RAID_DISK_S_OFFLINE) {
767				olddisk = tmpdisk;
768				break;
769			} else if (olddisk == NULL)
770				olddisk = tmpdisk;
771		}
772		if (olddisk == NULL) {
773nofit:
774			if (pd->pd_disk_meta.flags & INTEL_F_SPARE) {
775				g_raid_change_disk_state(disk,
776				    G_RAID_DISK_S_SPARE);
777				return (1);
778			} else {
779				g_raid_change_disk_state(disk,
780				    G_RAID_DISK_S_STALE);
781				return (0);
782			}
783		}
784		oldpd = (struct g_raid_md_intel_perdisk *)olddisk->d_md_data;
785		disk_pos = oldpd->pd_disk_pos;
786		resurrection = 1;
787	}
788
789	if (olddisk == NULL) {
790		/* Find placeholder by position. */
791		olddisk = g_raid_md_intel_get_disk(sc, disk_pos);
792		if (olddisk == NULL)
793			panic("No disk at position %d!", disk_pos);
794		if (olddisk->d_state != G_RAID_DISK_S_OFFLINE) {
795			G_RAID_DEBUG1(1, sc, "More then one disk for pos %d",
796			    disk_pos);
797			g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE);
798			return (0);
799		}
800		oldpd = (struct g_raid_md_intel_perdisk *)olddisk->d_md_data;
801	}
802
803	/* Replace failed disk or placeholder with new disk. */
804	TAILQ_FOREACH_SAFE(sd, &olddisk->d_subdisks, sd_next, tmpsd) {
805		TAILQ_REMOVE(&olddisk->d_subdisks, sd, sd_next);
806		TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
807		sd->sd_disk = disk;
808	}
809	oldpd->pd_disk_pos = -2;
810	pd->pd_disk_pos = disk_pos;
811
812	/* If it was placeholder -- destroy it. */
813	if (olddisk->d_state == G_RAID_DISK_S_OFFLINE) {
814		g_raid_destroy_disk(olddisk);
815	} else {
816		/* Otherwise, make it STALE_FAILED. */
817		g_raid_change_disk_state(olddisk, G_RAID_DISK_S_STALE_FAILED);
818		/* Update global metadata just in case. */
819		memcpy(&meta->disk[disk_pos], &pd->pd_disk_meta,
820		    sizeof(struct intel_raid_disk));
821	}
822
823	/* Welcome the new disk. */
824	if (resurrection)
825		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
826	else if (meta->disk[disk_pos].flags & INTEL_F_FAILED)
827		g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED);
828	else if (meta->disk[disk_pos].flags & INTEL_F_SPARE)
829		g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE);
830	else
831		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
832	TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
833		mvol = intel_get_volume(meta,
834		    (uintptr_t)(sd->sd_volume->v_md_data));
835		mmap0 = intel_get_map(mvol, 0);
836		if (mvol->migr_state)
837			mmap1 = intel_get_map(mvol, 1);
838		else
839			mmap1 = mmap0;
840
841		if (resurrection) {
842			/* Stale disk, almost same as new. */
843			g_raid_change_subdisk_state(sd,
844			    G_RAID_SUBDISK_S_NEW);
845		} else if (meta->disk[disk_pos].flags & INTEL_F_FAILED) {
846			/* Failed disk, almost useless. */
847			g_raid_change_subdisk_state(sd,
848			    G_RAID_SUBDISK_S_FAILED);
849		} else if (mvol->migr_state == 0) {
850			if (mmap0->status == INTEL_S_UNINITIALIZED) {
851				/* Freshly created uninitialized volume. */
852				g_raid_change_subdisk_state(sd,
853				    G_RAID_SUBDISK_S_UNINITIALIZED);
854			} else if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
855				/* Freshly inserted disk. */
856				g_raid_change_subdisk_state(sd,
857				    G_RAID_SUBDISK_S_NEW);
858			} else if (mvol->dirty) {
859				/* Dirty volume (unclean shutdown). */
860				g_raid_change_subdisk_state(sd,
861				    G_RAID_SUBDISK_S_STALE);
862			} else {
863				/* Up to date disk. */
864				g_raid_change_subdisk_state(sd,
865				    G_RAID_SUBDISK_S_ACTIVE);
866			}
867		} else if (mvol->migr_type == INTEL_MT_INIT ||
868			   mvol->migr_type == INTEL_MT_REBUILD) {
869			if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
870				/* Freshly inserted disk. */
871				g_raid_change_subdisk_state(sd,
872				    G_RAID_SUBDISK_S_NEW);
873			} else if (mmap1->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
874				/* Rebuilding disk. */
875				g_raid_change_subdisk_state(sd,
876				    G_RAID_SUBDISK_S_REBUILD);
877				if (mvol->dirty) {
878					sd->sd_rebuild_pos = 0;
879				} else {
880					sd->sd_rebuild_pos =
881					    intel_get_vol_curr_migr_unit(mvol) *
882					    sd->sd_volume->v_strip_size *
883					    mmap0->total_domains;
884				}
885			} else if (mvol->dirty) {
886				/* Dirty volume (unclean shutdown). */
887				g_raid_change_subdisk_state(sd,
888				    G_RAID_SUBDISK_S_STALE);
889			} else {
890				/* Up to date disk. */
891				g_raid_change_subdisk_state(sd,
892				    G_RAID_SUBDISK_S_ACTIVE);
893			}
894		} else if (mvol->migr_type == INTEL_MT_VERIFY ||
895			   mvol->migr_type == INTEL_MT_REPAIR) {
896			if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
897				/* Freshly inserted disk. */
898				g_raid_change_subdisk_state(sd,
899				    G_RAID_SUBDISK_S_NEW);
900			} else if (mmap1->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
901				/* Resyncing disk. */
902				g_raid_change_subdisk_state(sd,
903				    G_RAID_SUBDISK_S_RESYNC);
904				if (mvol->dirty) {
905					sd->sd_rebuild_pos = 0;
906				} else {
907					sd->sd_rebuild_pos =
908					    intel_get_vol_curr_migr_unit(mvol) *
909					    sd->sd_volume->v_strip_size *
910					    mmap0->total_domains;
911				}
912			} else if (mvol->dirty) {
913				/* Dirty volume (unclean shutdown). */
914				g_raid_change_subdisk_state(sd,
915				    G_RAID_SUBDISK_S_STALE);
916			} else {
917				/* Up to date disk. */
918				g_raid_change_subdisk_state(sd,
919				    G_RAID_SUBDISK_S_ACTIVE);
920			}
921		}
922		g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
923		    G_RAID_EVENT_SUBDISK);
924	}
925
926	/* Update status of our need for spare. */
927	if (mdi->mdio_started) {
928		mdi->mdio_incomplete =
929		    (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) <
930		     meta->total_disks);
931	}
932
933	return (resurrection);
934}
935
936static void
937g_disk_md_intel_retaste(void *arg, int pending)
938{
939
940	G_RAID_DEBUG(1, "Array is not complete, trying to retaste.");
941	g_retaste(&g_raid_class);
942	free(arg, M_MD_INTEL);
943}
944
945static void
946g_raid_md_intel_refill(struct g_raid_softc *sc)
947{
948	struct g_raid_md_object *md;
949	struct g_raid_md_intel_object *mdi;
950	struct intel_raid_conf *meta;
951	struct g_raid_disk *disk;
952	struct task *task;
953	int update, na;
954
955	md = sc->sc_md;
956	mdi = (struct g_raid_md_intel_object *)md;
957	meta = mdi->mdio_meta;
958	update = 0;
959	do {
960		/* Make sure we miss anything. */
961		na = g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE);
962		if (na == meta->total_disks)
963			break;
964
965		G_RAID_DEBUG1(1, md->mdo_softc,
966		    "Array is not complete (%d of %d), "
967		    "trying to refill.", na, meta->total_disks);
968
969		/* Try to get use some of STALE disks. */
970		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
971			if (disk->d_state == G_RAID_DISK_S_STALE) {
972				update += g_raid_md_intel_start_disk(disk);
973				if (disk->d_state == G_RAID_DISK_S_ACTIVE)
974					break;
975			}
976		}
977		if (disk != NULL)
978			continue;
979
980		/* Try to get use some of SPARE disks. */
981		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
982			if (disk->d_state == G_RAID_DISK_S_SPARE) {
983				update += g_raid_md_intel_start_disk(disk);
984				if (disk->d_state == G_RAID_DISK_S_ACTIVE)
985					break;
986			}
987		}
988	} while (disk != NULL);
989
990	/* Write new metadata if we changed something. */
991	if (update) {
992		g_raid_md_write_intel(md, NULL, NULL, NULL);
993		meta = mdi->mdio_meta;
994	}
995
996	/* Update status of our need for spare. */
997	mdi->mdio_incomplete = (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) <
998	    meta->total_disks);
999
1000	/* Request retaste hoping to find spare. */
1001	if (mdi->mdio_incomplete) {
1002		task = malloc(sizeof(struct task),
1003		    M_MD_INTEL, M_WAITOK | M_ZERO);
1004		TASK_INIT(task, 0, g_disk_md_intel_retaste, task);
1005		taskqueue_enqueue(taskqueue_swi, task);
1006	}
1007}
1008
1009static void
1010g_raid_md_intel_start(struct g_raid_softc *sc)
1011{
1012	struct g_raid_md_object *md;
1013	struct g_raid_md_intel_object *mdi;
1014	struct g_raid_md_intel_perdisk *pd;
1015	struct intel_raid_conf *meta;
1016	struct intel_raid_vol *mvol;
1017	struct intel_raid_map *mmap;
1018	struct g_raid_volume *vol;
1019	struct g_raid_subdisk *sd;
1020	struct g_raid_disk *disk;
1021	int i, j, disk_pos;
1022
1023	md = sc->sc_md;
1024	mdi = (struct g_raid_md_intel_object *)md;
1025	meta = mdi->mdio_meta;
1026
1027	/* Create volumes and subdisks. */
1028	for (i = 0; i < meta->total_volumes; i++) {
1029		mvol = intel_get_volume(meta, i);
1030		mmap = intel_get_map(mvol, 0);
1031		vol = g_raid_create_volume(sc, mvol->name, -1);
1032		vol->v_md_data = (void *)(intptr_t)i;
1033		if (mmap->type == INTEL_T_RAID0)
1034			vol->v_raid_level = G_RAID_VOLUME_RL_RAID0;
1035		else if (mmap->type == INTEL_T_RAID1 &&
1036		    mmap->total_domains >= 2 &&
1037		    mmap->total_domains <= mmap->total_disks) {
1038			/* Assume total_domains is correct. */
1039			if (mmap->total_domains == mmap->total_disks)
1040				vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
1041			else
1042				vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
1043		} else if (mmap->type == INTEL_T_RAID1) {
1044			/* total_domains looks wrong. */
1045			if (mmap->total_disks <= 2)
1046				vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
1047			else
1048				vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
1049		} else if (mmap->type == INTEL_T_RAID5)
1050			vol->v_raid_level = G_RAID_VOLUME_RL_RAID5;
1051		else
1052			vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
1053		vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
1054		vol->v_strip_size = (u_int)mmap->strip_sectors * 512; //ZZZ
1055		vol->v_disks_count = mmap->total_disks;
1056		vol->v_mediasize = (off_t)mvol->total_sectors * 512; //ZZZ
1057		vol->v_sectorsize = 512; //ZZZ
1058		for (j = 0; j < vol->v_disks_count; j++) {
1059			sd = &vol->v_subdisks[j];
1060			sd->sd_offset = intel_get_map_offset(mmap) * 512; //ZZZ
1061			sd->sd_size = intel_get_map_disk_sectors(mmap) * 512; //ZZZ
1062		}
1063		g_raid_start_volume(vol);
1064	}
1065
1066	/* Create disk placeholders to store data for later writing. */
1067	for (disk_pos = 0; disk_pos < meta->total_disks; disk_pos++) {
1068		pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
1069		pd->pd_disk_pos = disk_pos;
1070		pd->pd_disk_meta = meta->disk[disk_pos];
1071		disk = g_raid_create_disk(sc);
1072		disk->d_md_data = (void *)pd;
1073		disk->d_state = G_RAID_DISK_S_OFFLINE;
1074		for (i = 0; i < meta->total_volumes; i++) {
1075			mvol = intel_get_volume(meta, i);
1076			mmap = intel_get_map(mvol, 0);
1077			for (j = 0; j < mmap->total_disks; j++) {
1078				if ((mmap->disk_idx[j] & INTEL_DI_IDX) == disk_pos)
1079					break;
1080			}
1081			if (j == mmap->total_disks)
1082				continue;
1083			vol = g_raid_md_intel_get_volume(sc, i);
1084			sd = &vol->v_subdisks[j];
1085			sd->sd_disk = disk;
1086			TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
1087		}
1088	}
1089
1090	/* Make all disks found till the moment take their places. */
1091	do {
1092		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1093			if (disk->d_state == G_RAID_DISK_S_NONE) {
1094				g_raid_md_intel_start_disk(disk);
1095				break;
1096			}
1097		}
1098	} while (disk != NULL);
1099
1100	mdi->mdio_started = 1;
1101	G_RAID_DEBUG1(0, sc, "Array started.");
1102	g_raid_md_write_intel(md, NULL, NULL, NULL);
1103
1104	/* Pickup any STALE/SPARE disks to refill array if needed. */
1105	g_raid_md_intel_refill(sc);
1106
1107	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
1108		g_raid_event_send(vol, G_RAID_VOLUME_E_START,
1109		    G_RAID_EVENT_VOLUME);
1110	}
1111
1112	callout_stop(&mdi->mdio_start_co);
1113	G_RAID_DEBUG1(1, sc, "root_mount_rel %p", mdi->mdio_rootmount);
1114	root_mount_rel(mdi->mdio_rootmount);
1115	mdi->mdio_rootmount = NULL;
1116}
1117
1118static void
1119g_raid_md_intel_new_disk(struct g_raid_disk *disk)
1120{
1121	struct g_raid_softc *sc;
1122	struct g_raid_md_object *md;
1123	struct g_raid_md_intel_object *mdi;
1124	struct intel_raid_conf *pdmeta;
1125	struct g_raid_md_intel_perdisk *pd;
1126
1127	sc = disk->d_softc;
1128	md = sc->sc_md;
1129	mdi = (struct g_raid_md_intel_object *)md;
1130	pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
1131	pdmeta = pd->pd_meta;
1132
1133	if (mdi->mdio_started) {
1134		if (g_raid_md_intel_start_disk(disk))
1135			g_raid_md_write_intel(md, NULL, NULL, NULL);
1136	} else {
1137		/* If we haven't started yet - check metadata freshness. */
1138		if (mdi->mdio_meta == NULL ||
1139		    ((int32_t)(pdmeta->generation - mdi->mdio_generation)) > 0) {
1140			G_RAID_DEBUG1(1, sc, "Newer disk");
1141			if (mdi->mdio_meta != NULL)
1142				free(mdi->mdio_meta, M_MD_INTEL);
1143			mdi->mdio_meta = intel_meta_copy(pdmeta);
1144			mdi->mdio_generation = mdi->mdio_meta->generation;
1145			mdi->mdio_disks_present = 1;
1146		} else if (pdmeta->generation == mdi->mdio_generation) {
1147			mdi->mdio_disks_present++;
1148			G_RAID_DEBUG1(1, sc, "Matching disk (%d of %d up)",
1149			    mdi->mdio_disks_present,
1150			    mdi->mdio_meta->total_disks);
1151		} else {
1152			G_RAID_DEBUG1(1, sc, "Older disk");
1153		}
1154		/* If we collected all needed disks - start array. */
1155		if (mdi->mdio_disks_present == mdi->mdio_meta->total_disks)
1156			g_raid_md_intel_start(sc);
1157	}
1158}
1159
1160static void
1161g_raid_intel_go(void *arg)
1162{
1163	struct g_raid_softc *sc;
1164	struct g_raid_md_object *md;
1165	struct g_raid_md_intel_object *mdi;
1166
1167	sc = arg;
1168	md = sc->sc_md;
1169	mdi = (struct g_raid_md_intel_object *)md;
1170	if (!mdi->mdio_started) {
1171		G_RAID_DEBUG1(0, sc, "Force array start due to timeout.");
1172		g_raid_event_send(sc, G_RAID_NODE_E_START, 0);
1173	}
1174}
1175
1176static int
1177g_raid_md_create_intel(struct g_raid_md_object *md, struct g_class *mp,
1178    struct g_geom **gp)
1179{
1180	struct g_raid_softc *sc;
1181	struct g_raid_md_intel_object *mdi;
1182	char name[16];
1183
1184	mdi = (struct g_raid_md_intel_object *)md;
1185	mdi->mdio_config_id = arc4random();
1186	mdi->mdio_generation = 0;
1187	snprintf(name, sizeof(name), "Intel-%08x", mdi->mdio_config_id);
1188	sc = g_raid_create_node(mp, name, md);
1189	if (sc == NULL)
1190		return (G_RAID_MD_TASTE_FAIL);
1191	md->mdo_softc = sc;
1192	*gp = sc->sc_geom;
1193	return (G_RAID_MD_TASTE_NEW);
1194}
1195
1196/*
1197 * Return the last N characters of the serial label.  The Linux and
1198 * ataraid(7) code always uses the last 16 characters of the label to
1199 * store into the Intel meta format.  Generalize this to N characters
1200 * since that's easy.  Labels can be up to 20 characters for SATA drives
1201 * and up 251 characters for SAS drives.  Since intel controllers don't
1202 * support SAS drives, just stick with the SATA limits for stack friendliness.
1203 */
1204static int
1205g_raid_md_get_label(struct g_consumer *cp, char *serial, int serlen)
1206{
1207	char serial_buffer[24];
1208	int len, error;
1209
1210	len = sizeof(serial_buffer);
1211	error = g_io_getattr("GEOM::ident", cp, &len, serial_buffer);
1212	if (error != 0)
1213		return (error);
1214	len = strlen(serial_buffer);
1215	if (len > serlen)
1216		len -= serlen;
1217	else
1218		len = 0;
1219	strncpy(serial, serial_buffer + len, serlen);
1220	return (0);
1221}
1222
1223static int
1224g_raid_md_taste_intel(struct g_raid_md_object *md, struct g_class *mp,
1225                              struct g_consumer *cp, struct g_geom **gp)
1226{
1227	struct g_consumer *rcp;
1228	struct g_provider *pp;
1229	struct g_raid_md_intel_object *mdi, *mdi1;
1230	struct g_raid_softc *sc;
1231	struct g_raid_disk *disk;
1232	struct intel_raid_conf *meta;
1233	struct g_raid_md_intel_perdisk *pd;
1234	struct g_geom *geom;
1235	int error, disk_pos, result, spare, len;
1236	char serial[INTEL_SERIAL_LEN];
1237	char name[16];
1238	uint16_t vendor;
1239
1240	G_RAID_DEBUG(1, "Tasting Intel on %s", cp->provider->name);
1241	mdi = (struct g_raid_md_intel_object *)md;
1242	pp = cp->provider;
1243
1244	/* Read metadata from device. */
1245	meta = NULL;
1246	vendor = 0xffff;
1247	disk_pos = 0;
1248	if (g_access(cp, 1, 0, 0) != 0)
1249		return (G_RAID_MD_TASTE_FAIL);
1250	g_topology_unlock();
1251	error = g_raid_md_get_label(cp, serial, sizeof(serial));
1252	if (error != 0) {
1253		G_RAID_DEBUG(1, "Cannot get serial number from %s (error=%d).",
1254		    pp->name, error);
1255		goto fail2;
1256	}
1257	len = 2;
1258	if (pp->geom->rank == 1)
1259		g_io_getattr("GEOM::hba_vendor", cp, &len, &vendor);
1260	meta = intel_meta_read(cp);
1261	g_topology_lock();
1262	g_access(cp, -1, 0, 0);
1263	if (meta == NULL) {
1264		if (g_raid_aggressive_spare) {
1265			if (vendor != 0x8086) {
1266				G_RAID_DEBUG(1,
1267				    "Intel vendor mismatch 0x%04x != 0x8086",
1268				    vendor);
1269			} else {
1270				G_RAID_DEBUG(1,
1271				    "No Intel metadata, forcing spare.");
1272				spare = 2;
1273				goto search;
1274			}
1275		}
1276		return (G_RAID_MD_TASTE_FAIL);
1277	}
1278
1279	/* Check this disk position in obtained metadata. */
1280	disk_pos = intel_meta_find_disk(meta, serial);
1281	if (disk_pos < 0) {
1282		G_RAID_DEBUG(1, "Intel serial '%s' not found", serial);
1283		goto fail1;
1284	}
1285	if (intel_get_disk_sectors(&meta->disk[disk_pos]) !=
1286	    (pp->mediasize / pp->sectorsize)) {
1287		G_RAID_DEBUG(1, "Intel size mismatch %ju != %ju",
1288		    intel_get_disk_sectors(&meta->disk[disk_pos]),
1289		    (off_t)(pp->mediasize / pp->sectorsize));
1290		goto fail1;
1291	}
1292
1293	/* Metadata valid. Print it. */
1294	g_raid_md_intel_print(meta);
1295	G_RAID_DEBUG(1, "Intel disk position %d", disk_pos);
1296	spare = meta->disk[disk_pos].flags & INTEL_F_SPARE;
1297
1298search:
1299	/* Search for matching node. */
1300	sc = NULL;
1301	mdi1 = NULL;
1302	LIST_FOREACH(geom, &mp->geom, geom) {
1303		sc = geom->softc;
1304		if (sc == NULL)
1305			continue;
1306		if (sc->sc_stopping != 0)
1307			continue;
1308		if (sc->sc_md->mdo_class != md->mdo_class)
1309			continue;
1310		mdi1 = (struct g_raid_md_intel_object *)sc->sc_md;
1311		if (spare) {
1312			if (mdi1->mdio_incomplete)
1313				break;
1314		} else {
1315			if (mdi1->mdio_config_id == meta->config_id)
1316				break;
1317		}
1318	}
1319
1320	/* Found matching node. */
1321	if (geom != NULL) {
1322		G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name);
1323		result = G_RAID_MD_TASTE_EXISTING;
1324
1325	} else if (spare) { /* Not found needy node -- left for later. */
1326		G_RAID_DEBUG(1, "Spare is not needed at this time");
1327		goto fail1;
1328
1329	} else { /* Not found matching node -- create one. */
1330		result = G_RAID_MD_TASTE_NEW;
1331		mdi->mdio_config_id = meta->config_id;
1332		snprintf(name, sizeof(name), "Intel-%08x", meta->config_id);
1333		sc = g_raid_create_node(mp, name, md);
1334		md->mdo_softc = sc;
1335		geom = sc->sc_geom;
1336		callout_init(&mdi->mdio_start_co, 1);
1337		callout_reset(&mdi->mdio_start_co, g_raid_start_timeout * hz,
1338		    g_raid_intel_go, sc);
1339		mdi->mdio_rootmount = root_mount_hold("GRAID-Intel");
1340		G_RAID_DEBUG1(1, sc, "root_mount_hold %p", mdi->mdio_rootmount);
1341	}
1342
1343	rcp = g_new_consumer(geom);
1344	g_attach(rcp, pp);
1345	if (g_access(rcp, 1, 1, 1) != 0)
1346		; //goto fail1;
1347
1348	g_topology_unlock();
1349	sx_xlock(&sc->sc_lock);
1350
1351	pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
1352	pd->pd_meta = meta;
1353	pd->pd_disk_pos = -1;
1354	if (spare == 2) {
1355		memcpy(&pd->pd_disk_meta.serial[0], serial, INTEL_SERIAL_LEN);
1356		intel_set_disk_sectors(&pd->pd_disk_meta,
1357		    pp->mediasize / pp->sectorsize);
1358		pd->pd_disk_meta.id = 0;
1359		pd->pd_disk_meta.flags = INTEL_F_SPARE;
1360	} else {
1361		pd->pd_disk_meta = meta->disk[disk_pos];
1362	}
1363	disk = g_raid_create_disk(sc);
1364	disk->d_md_data = (void *)pd;
1365	disk->d_consumer = rcp;
1366	rcp->private = disk;
1367
1368	/* Read kernel dumping information. */
1369	disk->d_kd.offset = 0;
1370	disk->d_kd.length = OFF_MAX;
1371	len = sizeof(disk->d_kd);
1372	error = g_io_getattr("GEOM::kerneldump", rcp, &len, &disk->d_kd);
1373	if (disk->d_kd.di.dumper == NULL)
1374		G_RAID_DEBUG1(2, sc, "Dumping not supported by %s: %d.",
1375		    rcp->provider->name, error);
1376
1377	g_raid_md_intel_new_disk(disk);
1378
1379	sx_xunlock(&sc->sc_lock);
1380	g_topology_lock();
1381	*gp = geom;
1382	return (result);
1383fail2:
1384	g_topology_lock();
1385	g_access(cp, -1, 0, 0);
1386fail1:
1387	free(meta, M_MD_INTEL);
1388	return (G_RAID_MD_TASTE_FAIL);
1389}
1390
1391static int
1392g_raid_md_event_intel(struct g_raid_md_object *md,
1393    struct g_raid_disk *disk, u_int event)
1394{
1395	struct g_raid_softc *sc;
1396	struct g_raid_subdisk *sd;
1397	struct g_raid_md_intel_object *mdi;
1398	struct g_raid_md_intel_perdisk *pd;
1399
1400	sc = md->mdo_softc;
1401	mdi = (struct g_raid_md_intel_object *)md;
1402	if (disk == NULL) {
1403		switch (event) {
1404		case G_RAID_NODE_E_START:
1405			if (!mdi->mdio_started)
1406				g_raid_md_intel_start(sc);
1407			return (0);
1408		}
1409		return (-1);
1410	}
1411	pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
1412	switch (event) {
1413	case G_RAID_DISK_E_DISCONNECTED:
1414		/* If disk was assigned, just update statuses. */
1415		if (pd->pd_disk_pos >= 0) {
1416			g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
1417			if (disk->d_consumer) {
1418				g_raid_kill_consumer(sc, disk->d_consumer);
1419				disk->d_consumer = NULL;
1420			}
1421			TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
1422				g_raid_change_subdisk_state(sd,
1423				    G_RAID_SUBDISK_S_NONE);
1424				g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
1425				    G_RAID_EVENT_SUBDISK);
1426			}
1427		} else {
1428			/* Otherwise -- delete. */
1429			g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
1430			g_raid_destroy_disk(disk);
1431		}
1432
1433		/* Write updated metadata to all disks. */
1434		g_raid_md_write_intel(md, NULL, NULL, NULL);
1435
1436		/* Check if anything left except placeholders. */
1437		if (g_raid_ndisks(sc, -1) ==
1438		    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
1439			g_raid_destroy_node(sc, 0);
1440		else
1441			g_raid_md_intel_refill(sc);
1442		return (0);
1443	}
1444	return (-2);
1445}
1446
1447static int
1448g_raid_md_ctl_intel(struct g_raid_md_object *md,
1449    struct gctl_req *req)
1450{
1451	struct g_raid_softc *sc;
1452	struct g_raid_volume *vol, *vol1;
1453	struct g_raid_subdisk *sd;
1454	struct g_raid_disk *disk;
1455	struct g_raid_md_intel_object *mdi;
1456	struct g_raid_md_intel_perdisk *pd;
1457	struct g_consumer *cp;
1458	struct g_provider *pp;
1459	char arg[16], serial[INTEL_SERIAL_LEN];
1460	const char *verb, *volname, *levelname, *diskname;
1461	char *tmp;
1462	int *nargs, *force;
1463	off_t off, size, sectorsize, strip, disk_sectors;
1464	intmax_t *sizearg, *striparg;
1465	int numdisks, i, len, level, qual, update;
1466	int error;
1467
1468	sc = md->mdo_softc;
1469	mdi = (struct g_raid_md_intel_object *)md;
1470	verb = gctl_get_param(req, "verb", NULL);
1471	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
1472	error = 0;
1473	if (strcmp(verb, "label") == 0) {
1474
1475		if (*nargs < 4) {
1476			gctl_error(req, "Invalid number of arguments.");
1477			return (-1);
1478		}
1479		volname = gctl_get_asciiparam(req, "arg1");
1480		if (volname == NULL) {
1481			gctl_error(req, "No volume name.");
1482			return (-2);
1483		}
1484		levelname = gctl_get_asciiparam(req, "arg2");
1485		if (levelname == NULL) {
1486			gctl_error(req, "No RAID level.");
1487			return (-3);
1488		}
1489		if (g_raid_volume_str2level(levelname, &level, &qual)) {
1490			gctl_error(req, "Unknown RAID level '%s'.", levelname);
1491			return (-4);
1492		}
1493		numdisks = *nargs - 3;
1494		force = gctl_get_paraml(req, "force", sizeof(*force));
1495		if (!g_raid_md_intel_supported(level, qual, numdisks,
1496		    force ? *force : 0)) {
1497			gctl_error(req, "Unsupported RAID level "
1498			    "(0x%02x/0x%02x), or number of disks (%d).",
1499			    level, qual, numdisks);
1500			return (-5);
1501		}
1502
1503		/* Search for disks, connect them and probe. */
1504		size = 0x7fffffffffffffffllu;
1505		sectorsize = 0;
1506		for (i = 0; i < numdisks; i++) {
1507			snprintf(arg, sizeof(arg), "arg%d", i + 3);
1508			diskname = gctl_get_asciiparam(req, arg);
1509			if (diskname == NULL) {
1510				gctl_error(req, "No disk name (%s).", arg);
1511				error = -6;
1512				break;
1513			}
1514			if (strcmp(diskname, "NONE") == 0) {
1515				cp = NULL;
1516				pp = NULL;
1517			} else {
1518				g_topology_lock();
1519				cp = g_raid_open_consumer(sc, diskname);
1520				if (cp == NULL) {
1521					gctl_error(req, "Can't open disk '%s'.",
1522					    diskname);
1523					g_topology_unlock();
1524					error = -7;
1525					break;
1526				}
1527				pp = cp->provider;
1528			}
1529			pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
1530			pd->pd_disk_pos = i;
1531			disk = g_raid_create_disk(sc);
1532			disk->d_md_data = (void *)pd;
1533			disk->d_consumer = cp;
1534			if (cp == NULL) {
1535				strcpy(&pd->pd_disk_meta.serial[0], "NONE");
1536				pd->pd_disk_meta.id = 0xffffffff;
1537				pd->pd_disk_meta.flags = INTEL_F_ASSIGNED;
1538				continue;
1539			}
1540			cp->private = disk;
1541			g_topology_unlock();
1542
1543			error = g_raid_md_get_label(cp,
1544			    &pd->pd_disk_meta.serial[0], INTEL_SERIAL_LEN);
1545			if (error != 0) {
1546				gctl_error(req,
1547				    "Can't get serial for provider '%s'.",
1548				    diskname);
1549				error = -8;
1550				break;
1551			}
1552
1553			/* Read kernel dumping information. */
1554			disk->d_kd.offset = 0;
1555			disk->d_kd.length = OFF_MAX;
1556			len = sizeof(disk->d_kd);
1557			g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd);
1558			if (disk->d_kd.di.dumper == NULL)
1559				G_RAID_DEBUG1(2, sc,
1560				    "Dumping not supported by %s.",
1561				    cp->provider->name);
1562
1563			intel_set_disk_sectors(&pd->pd_disk_meta,
1564			    pp->mediasize / pp->sectorsize);
1565			if (size > pp->mediasize)
1566				size = pp->mediasize;
1567			if (sectorsize < pp->sectorsize)
1568				sectorsize = pp->sectorsize;
1569			pd->pd_disk_meta.id = 0;
1570			pd->pd_disk_meta.flags = INTEL_F_ASSIGNED | INTEL_F_ONLINE;
1571		}
1572		if (error != 0)
1573			return (error);
1574
1575		if (sectorsize <= 0) {
1576			gctl_error(req, "Can't get sector size.");
1577			return (-8);
1578		}
1579
1580		/* Reserve some space for metadata. */
1581		size -= ((4096 + sectorsize - 1) / sectorsize) * sectorsize;
1582
1583		/* Handle size argument. */
1584		len = sizeof(*sizearg);
1585		sizearg = gctl_get_param(req, "size", &len);
1586		if (sizearg != NULL && len == sizeof(*sizearg) &&
1587		    *sizearg > 0) {
1588			if (*sizearg > size) {
1589				gctl_error(req, "Size too big %lld > %lld.",
1590				    (long long)*sizearg, (long long)size);
1591				return (-9);
1592			}
1593			size = *sizearg;
1594		}
1595
1596		/* Handle strip argument. */
1597		strip = 131072;
1598		len = sizeof(*striparg);
1599		striparg = gctl_get_param(req, "strip", &len);
1600		if (striparg != NULL && len == sizeof(*striparg) &&
1601		    *striparg > 0) {
1602			if (*striparg < sectorsize) {
1603				gctl_error(req, "Strip size too small.");
1604				return (-10);
1605			}
1606			if (*striparg % sectorsize != 0) {
1607				gctl_error(req, "Incorrect strip size.");
1608				return (-11);
1609			}
1610			if (strip > 65535 * sectorsize) {
1611				gctl_error(req, "Strip size too big.");
1612				return (-12);
1613			}
1614			strip = *striparg;
1615		}
1616
1617		/* Round size down to strip or sector. */
1618		if (level == G_RAID_VOLUME_RL_RAID1)
1619			size -= (size % sectorsize);
1620		else if (level == G_RAID_VOLUME_RL_RAID1E &&
1621		    (numdisks & 1) != 0)
1622			size -= (size % (2 * strip));
1623		else
1624			size -= (size % strip);
1625		if (size <= 0) {
1626			gctl_error(req, "Size too small.");
1627			return (-13);
1628		}
1629
1630		/* We have all we need, create things: volume, ... */
1631		mdi->mdio_started = 1;
1632		vol = g_raid_create_volume(sc, volname, -1);
1633		vol->v_md_data = (void *)(intptr_t)0;
1634		vol->v_raid_level = level;
1635		vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
1636		vol->v_strip_size = strip;
1637		vol->v_disks_count = numdisks;
1638		if (level == G_RAID_VOLUME_RL_RAID0)
1639			vol->v_mediasize = size * numdisks;
1640		else if (level == G_RAID_VOLUME_RL_RAID1)
1641			vol->v_mediasize = size;
1642		else if (level == G_RAID_VOLUME_RL_RAID5)
1643			vol->v_mediasize = size * (numdisks - 1);
1644		else { /* RAID1E */
1645			vol->v_mediasize = ((size * numdisks) / strip / 2) *
1646			    strip;
1647		}
1648		vol->v_sectorsize = sectorsize;
1649		g_raid_start_volume(vol);
1650
1651		/* , and subdisks. */
1652		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1653			pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
1654			sd = &vol->v_subdisks[pd->pd_disk_pos];
1655			sd->sd_disk = disk;
1656			sd->sd_offset = 0;
1657			sd->sd_size = size;
1658			TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
1659			if (sd->sd_disk->d_consumer != NULL) {
1660				g_raid_change_disk_state(disk,
1661				    G_RAID_DISK_S_ACTIVE);
1662				g_raid_change_subdisk_state(sd,
1663				    G_RAID_SUBDISK_S_ACTIVE);
1664				g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
1665				    G_RAID_EVENT_SUBDISK);
1666			} else {
1667				g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
1668			}
1669		}
1670
1671		/* Write metadata based on created entities. */
1672		G_RAID_DEBUG1(0, sc, "Array started.");
1673		g_raid_md_write_intel(md, NULL, NULL, NULL);
1674
1675		/* Pickup any STALE/SPARE disks to refill array if needed. */
1676		g_raid_md_intel_refill(sc);
1677
1678		g_raid_event_send(vol, G_RAID_VOLUME_E_START,
1679		    G_RAID_EVENT_VOLUME);
1680		return (0);
1681	}
1682	if (strcmp(verb, "add") == 0) {
1683
1684		if (*nargs != 3) {
1685			gctl_error(req, "Invalid number of arguments.");
1686			return (-1);
1687		}
1688		volname = gctl_get_asciiparam(req, "arg1");
1689		if (volname == NULL) {
1690			gctl_error(req, "No volume name.");
1691			return (-2);
1692		}
1693		levelname = gctl_get_asciiparam(req, "arg2");
1694		if (levelname == NULL) {
1695			gctl_error(req, "No RAID level.");
1696			return (-3);
1697		}
1698		if (g_raid_volume_str2level(levelname, &level, &qual)) {
1699			gctl_error(req, "Unknown RAID level '%s'.", levelname);
1700			return (-4);
1701		}
1702
1703		/* Look for existing volumes. */
1704		i = 0;
1705		vol1 = NULL;
1706		TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
1707			vol1 = vol;
1708			i++;
1709		}
1710		if (i > 1) {
1711			gctl_error(req, "Maximum two volumes supported.");
1712			return (-6);
1713		}
1714		if (vol1 == NULL) {
1715			gctl_error(req, "At least one volume must exist.");
1716			return (-7);
1717		}
1718
1719		numdisks = vol1->v_disks_count;
1720		force = gctl_get_paraml(req, "force", sizeof(*force));
1721		if (!g_raid_md_intel_supported(level, qual, numdisks,
1722		    force ? *force : 0)) {
1723			gctl_error(req, "Unsupported RAID level "
1724			    "(0x%02x/0x%02x), or number of disks (%d).",
1725			    level, qual, numdisks);
1726			return (-5);
1727		}
1728
1729		/* Collect info about present disks. */
1730		size = 0x7fffffffffffffffllu;
1731		sectorsize = 512;
1732		for (i = 0; i < numdisks; i++) {
1733			disk = vol1->v_subdisks[i].sd_disk;
1734			pd = (struct g_raid_md_intel_perdisk *)
1735			    disk->d_md_data;
1736			disk_sectors =
1737			    intel_get_disk_sectors(&pd->pd_disk_meta);
1738
1739			if (disk_sectors * 512 < size)
1740				size = disk_sectors * 512;
1741			if (disk->d_consumer != NULL &&
1742			    disk->d_consumer->provider != NULL &&
1743			    disk->d_consumer->provider->sectorsize >
1744			     sectorsize) {
1745				sectorsize =
1746				    disk->d_consumer->provider->sectorsize;
1747			}
1748		}
1749
1750		/* Reserve some space for metadata. */
1751		size -= ((4096 + sectorsize - 1) / sectorsize) * sectorsize;
1752
1753		/* Decide insert before or after. */
1754		sd = &vol1->v_subdisks[0];
1755		if (sd->sd_offset >
1756		    size - (sd->sd_offset + sd->sd_size)) {
1757			off = 0;
1758			size = sd->sd_offset;
1759		} else {
1760			off = sd->sd_offset + sd->sd_size;
1761			size = size - (sd->sd_offset + sd->sd_size);
1762		}
1763
1764		/* Handle strip argument. */
1765		strip = 131072;
1766		len = sizeof(*striparg);
1767		striparg = gctl_get_param(req, "strip", &len);
1768		if (striparg != NULL && len == sizeof(*striparg) &&
1769		    *striparg > 0) {
1770			if (*striparg < sectorsize) {
1771				gctl_error(req, "Strip size too small.");
1772				return (-10);
1773			}
1774			if (*striparg % sectorsize != 0) {
1775				gctl_error(req, "Incorrect strip size.");
1776				return (-11);
1777			}
1778			if (strip > 65535 * sectorsize) {
1779				gctl_error(req, "Strip size too big.");
1780				return (-12);
1781			}
1782			strip = *striparg;
1783		}
1784
1785		/* Round offset up to strip. */
1786		if (off % strip != 0) {
1787			size -= strip - off % strip;
1788			off += strip - off % strip;
1789		}
1790
1791		/* Handle size argument. */
1792		len = sizeof(*sizearg);
1793		sizearg = gctl_get_param(req, "size", &len);
1794		if (sizearg != NULL && len == sizeof(*sizearg) &&
1795		    *sizearg > 0) {
1796			if (*sizearg > size) {
1797				gctl_error(req, "Size too big %lld > %lld.",
1798				    (long long)*sizearg, (long long)size);
1799				return (-9);
1800			}
1801			size = *sizearg;
1802		}
1803
1804		/* Round size down to strip or sector. */
1805		if (level == G_RAID_VOLUME_RL_RAID1)
1806			size -= (size % sectorsize);
1807		else
1808			size -= (size % strip);
1809		if (size <= 0) {
1810			gctl_error(req, "Size too small.");
1811			return (-13);
1812		}
1813		if (size > 0xffffffffllu * sectorsize) {
1814			gctl_error(req, "Size too big.");
1815			return (-14);
1816		}
1817
1818		/* We have all we need, create things: volume, ... */
1819		vol = g_raid_create_volume(sc, volname, -1);
1820		vol->v_md_data = (void *)(intptr_t)i;
1821		vol->v_raid_level = level;
1822		vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
1823		vol->v_strip_size = strip;
1824		vol->v_disks_count = numdisks;
1825		if (level == G_RAID_VOLUME_RL_RAID0)
1826			vol->v_mediasize = size * numdisks;
1827		else if (level == G_RAID_VOLUME_RL_RAID1)
1828			vol->v_mediasize = size;
1829		else if (level == G_RAID_VOLUME_RL_RAID5)
1830			vol->v_mediasize = size * (numdisks - 1);
1831		else { /* RAID1E */
1832			vol->v_mediasize = ((size * numdisks) / strip / 2) *
1833			    strip;
1834		}
1835		vol->v_sectorsize = sectorsize;
1836		g_raid_start_volume(vol);
1837
1838		/* , and subdisks. */
1839		for (i = 0; i < numdisks; i++) {
1840			disk = vol1->v_subdisks[i].sd_disk;
1841			sd = &vol->v_subdisks[i];
1842			sd->sd_disk = disk;
1843			sd->sd_offset = off;
1844			sd->sd_size = size;
1845			TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
1846			if (disk->d_state == G_RAID_DISK_S_ACTIVE) {
1847				g_raid_change_subdisk_state(sd,
1848				    G_RAID_SUBDISK_S_ACTIVE);
1849				g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
1850				    G_RAID_EVENT_SUBDISK);
1851			}
1852		}
1853
1854		/* Write metadata based on created entities. */
1855		g_raid_md_write_intel(md, NULL, NULL, NULL);
1856
1857		g_raid_event_send(vol, G_RAID_VOLUME_E_START,
1858		    G_RAID_EVENT_VOLUME);
1859		return (0);
1860	}
1861	if (strcmp(verb, "delete") == 0) {
1862
1863		/* Full node destruction. */
1864		if (*nargs == 1) {
1865			/* Check if some volume is still open. */
1866			force = gctl_get_paraml(req, "force", sizeof(*force));
1867			if (force != NULL && *force == 0 &&
1868			    g_raid_nopens(sc) != 0) {
1869				gctl_error(req, "Some volume is still open.");
1870				return (-4);
1871			}
1872
1873			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1874				if (disk->d_consumer)
1875					intel_meta_erase(disk->d_consumer);
1876			}
1877			g_raid_destroy_node(sc, 0);
1878			return (0);
1879		}
1880
1881		/* Destroy specified volume. If it was last - all node. */
1882		if (*nargs != 2) {
1883			gctl_error(req, "Invalid number of arguments.");
1884			return (-1);
1885		}
1886		volname = gctl_get_asciiparam(req, "arg1");
1887		if (volname == NULL) {
1888			gctl_error(req, "No volume name.");
1889			return (-2);
1890		}
1891
1892		/* Search for volume. */
1893		TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
1894			if (strcmp(vol->v_name, volname) == 0)
1895				break;
1896		}
1897		if (vol == NULL) {
1898			i = strtol(volname, &tmp, 10);
1899			if (verb != volname && tmp[0] == 0) {
1900				TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
1901					if (vol->v_global_id == i)
1902						break;
1903				}
1904			}
1905		}
1906		if (vol == NULL) {
1907			gctl_error(req, "Volume '%s' not found.", volname);
1908			return (-3);
1909		}
1910
1911		/* Check if volume is still open. */
1912		force = gctl_get_paraml(req, "force", sizeof(*force));
1913		if (force != NULL && *force == 0 &&
1914		    vol->v_provider_open != 0) {
1915			gctl_error(req, "Volume is still open.");
1916			return (-4);
1917		}
1918
1919		/* Destroy volume and potentially node. */
1920		i = 0;
1921		TAILQ_FOREACH(vol1, &sc->sc_volumes, v_next)
1922			i++;
1923		if (i >= 2) {
1924			g_raid_destroy_volume(vol);
1925			g_raid_md_write_intel(md, NULL, NULL, NULL);
1926		} else {
1927			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1928				if (disk->d_consumer)
1929					intel_meta_erase(disk->d_consumer);
1930			}
1931			g_raid_destroy_node(sc, 0);
1932		}
1933		return (0);
1934	}
1935	if (strcmp(verb, "remove") == 0 ||
1936	    strcmp(verb, "fail") == 0) {
1937		if (*nargs < 2) {
1938			gctl_error(req, "Invalid number of arguments.");
1939			return (-1);
1940		}
1941		for (i = 1; i < *nargs; i++) {
1942			snprintf(arg, sizeof(arg), "arg%d", i);
1943			diskname = gctl_get_asciiparam(req, arg);
1944			if (diskname == NULL) {
1945				gctl_error(req, "No disk name (%s).", arg);
1946				error = -2;
1947				break;
1948			}
1949			if (strncmp(diskname, "/dev/", 5) == 0)
1950				diskname += 5;
1951
1952			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1953				if (disk->d_consumer != NULL &&
1954				    disk->d_consumer->provider != NULL &&
1955				    strcmp(disk->d_consumer->provider->name,
1956				     diskname) == 0)
1957					break;
1958			}
1959			if (disk == NULL) {
1960				gctl_error(req, "Disk '%s' not found.",
1961				    diskname);
1962				error = -3;
1963				break;
1964			}
1965
1966			if (strcmp(verb, "fail") == 0) {
1967				g_raid_md_fail_disk_intel(md, NULL, disk);
1968				continue;
1969			}
1970
1971			pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
1972
1973			/* Erase metadata on deleting disk. */
1974			intel_meta_erase(disk->d_consumer);
1975
1976			/* If disk was assigned, just update statuses. */
1977			if (pd->pd_disk_pos >= 0) {
1978				g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
1979				g_raid_kill_consumer(sc, disk->d_consumer);
1980				disk->d_consumer = NULL;
1981				TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
1982					g_raid_change_subdisk_state(sd,
1983					    G_RAID_SUBDISK_S_NONE);
1984					g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
1985					    G_RAID_EVENT_SUBDISK);
1986				}
1987			} else {
1988				/* Otherwise -- delete. */
1989				g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
1990				g_raid_destroy_disk(disk);
1991			}
1992		}
1993
1994		/* Write updated metadata to remaining disks. */
1995		g_raid_md_write_intel(md, NULL, NULL, NULL);
1996
1997		/* Check if anything left except placeholders. */
1998		if (g_raid_ndisks(sc, -1) ==
1999		    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
2000			g_raid_destroy_node(sc, 0);
2001		else
2002			g_raid_md_intel_refill(sc);
2003		return (error);
2004	}
2005	if (strcmp(verb, "insert") == 0) {
2006		if (*nargs < 2) {
2007			gctl_error(req, "Invalid number of arguments.");
2008			return (-1);
2009		}
2010		update = 0;
2011		for (i = 1; i < *nargs; i++) {
2012			/* Get disk name. */
2013			snprintf(arg, sizeof(arg), "arg%d", i);
2014			diskname = gctl_get_asciiparam(req, arg);
2015			if (diskname == NULL) {
2016				gctl_error(req, "No disk name (%s).", arg);
2017				error = -3;
2018				break;
2019			}
2020
2021			/* Try to find provider with specified name. */
2022			g_topology_lock();
2023			cp = g_raid_open_consumer(sc, diskname);
2024			if (cp == NULL) {
2025				gctl_error(req, "Can't open disk '%s'.",
2026				    diskname);
2027				g_topology_unlock();
2028				error = -4;
2029				break;
2030			}
2031			pp = cp->provider;
2032			g_topology_unlock();
2033
2034			/* Read disk serial. */
2035			error = g_raid_md_get_label(cp,
2036			    &serial[0], INTEL_SERIAL_LEN);
2037			if (error != 0) {
2038				gctl_error(req,
2039				    "Can't get serial for provider '%s'.",
2040				    diskname);
2041				g_raid_kill_consumer(sc, cp);
2042				error = -7;
2043				break;
2044			}
2045
2046			pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
2047			pd->pd_disk_pos = -1;
2048
2049			disk = g_raid_create_disk(sc);
2050			disk->d_consumer = cp;
2051			disk->d_md_data = (void *)pd;
2052			cp->private = disk;
2053
2054			/* Read kernel dumping information. */
2055			disk->d_kd.offset = 0;
2056			disk->d_kd.length = OFF_MAX;
2057			len = sizeof(disk->d_kd);
2058			g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd);
2059			if (disk->d_kd.di.dumper == NULL)
2060				G_RAID_DEBUG1(2, sc,
2061				    "Dumping not supported by %s.",
2062				    cp->provider->name);
2063
2064			memcpy(&pd->pd_disk_meta.serial[0], &serial[0],
2065			    INTEL_SERIAL_LEN);
2066			intel_set_disk_sectors(&pd->pd_disk_meta,
2067			    pp->mediasize / pp->sectorsize);
2068			pd->pd_disk_meta.id = 0;
2069			pd->pd_disk_meta.flags = INTEL_F_SPARE;
2070
2071			/* Welcome the "new" disk. */
2072			update += g_raid_md_intel_start_disk(disk);
2073			if (disk->d_state == G_RAID_DISK_S_SPARE) {
2074				intel_meta_write_spare(cp, &pd->pd_disk_meta);
2075				g_raid_destroy_disk(disk);
2076			} else if (disk->d_state != G_RAID_DISK_S_ACTIVE) {
2077				gctl_error(req, "Disk '%s' doesn't fit.",
2078				    diskname);
2079				g_raid_destroy_disk(disk);
2080				error = -8;
2081				break;
2082			}
2083		}
2084
2085		/* Write new metadata if we changed something. */
2086		if (update)
2087			g_raid_md_write_intel(md, NULL, NULL, NULL);
2088		return (error);
2089	}
2090	return (-100);
2091}
2092
2093static int
2094g_raid_md_write_intel(struct g_raid_md_object *md, struct g_raid_volume *tvol,
2095    struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
2096{
2097	struct g_raid_softc *sc;
2098	struct g_raid_volume *vol;
2099	struct g_raid_subdisk *sd;
2100	struct g_raid_disk *disk;
2101	struct g_raid_md_intel_object *mdi;
2102	struct g_raid_md_intel_perdisk *pd;
2103	struct intel_raid_conf *meta;
2104	struct intel_raid_vol *mvol;
2105	struct intel_raid_map *mmap0, *mmap1;
2106	off_t sectorsize = 512, pos;
2107	const char *version, *cv;
2108	int vi, sdi, numdisks, len, state, stale;
2109
2110	sc = md->mdo_softc;
2111	mdi = (struct g_raid_md_intel_object *)md;
2112
2113	if (sc->sc_stopping == G_RAID_DESTROY_HARD)
2114		return (0);
2115
2116	/* Bump generation. Newly written metadata may differ from previous. */
2117	mdi->mdio_generation++;
2118
2119	/* Count number of disks. */
2120	numdisks = 0;
2121	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2122		pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
2123		if (pd->pd_disk_pos < 0)
2124			continue;
2125		numdisks++;
2126		if (disk->d_state == G_RAID_DISK_S_ACTIVE) {
2127			pd->pd_disk_meta.flags =
2128			    INTEL_F_ONLINE | INTEL_F_ASSIGNED;
2129		} else if (disk->d_state == G_RAID_DISK_S_FAILED) {
2130			pd->pd_disk_meta.flags = INTEL_F_FAILED | INTEL_F_ASSIGNED;
2131		} else {
2132			pd->pd_disk_meta.flags = INTEL_F_ASSIGNED;
2133			if (pd->pd_disk_meta.id != 0xffffffff) {
2134				pd->pd_disk_meta.id = 0xffffffff;
2135				len = strlen(pd->pd_disk_meta.serial);
2136				len = min(len, INTEL_SERIAL_LEN - 3);
2137				strcpy(pd->pd_disk_meta.serial + len, ":0");
2138			}
2139		}
2140	}
2141
2142	/* Fill anchor and disks. */
2143	meta = malloc(INTEL_MAX_MD_SIZE(numdisks),
2144	    M_MD_INTEL, M_WAITOK | M_ZERO);
2145	memcpy(&meta->intel_id[0], INTEL_MAGIC, sizeof(INTEL_MAGIC) - 1);
2146	meta->config_size = INTEL_MAX_MD_SIZE(numdisks);
2147	meta->config_id = mdi->mdio_config_id;
2148	meta->generation = mdi->mdio_generation;
2149	meta->attributes = INTEL_ATTR_CHECKSUM;
2150	meta->total_disks = numdisks;
2151	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2152		pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
2153		if (pd->pd_disk_pos < 0)
2154			continue;
2155		meta->disk[pd->pd_disk_pos] = pd->pd_disk_meta;
2156	}
2157
2158	/* Fill volumes and maps. */
2159	vi = 0;
2160	version = INTEL_VERSION_1000;
2161	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
2162		if (vol->v_stopping)
2163			continue;
2164		mvol = intel_get_volume(meta, vi);
2165
2166		/* New metadata may have different volumes order. */
2167		vol->v_md_data = (void *)(intptr_t)vi;
2168
2169		for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
2170			sd = &vol->v_subdisks[sdi];
2171			if (sd->sd_disk != NULL)
2172				break;
2173		}
2174		if (sdi >= vol->v_disks_count)
2175			panic("No any filled subdisk in volume");
2176		if (vol->v_mediasize >= 0x20000000000llu)
2177			meta->attributes |= INTEL_ATTR_2TB;
2178		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0)
2179			meta->attributes |= INTEL_ATTR_RAID0;
2180		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
2181			meta->attributes |= INTEL_ATTR_RAID1;
2182		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5)
2183			meta->attributes |= INTEL_ATTR_RAID5;
2184		else
2185			meta->attributes |= INTEL_ATTR_RAID10;
2186
2187		if (meta->attributes & INTEL_ATTR_2TB)
2188			cv = INTEL_VERSION_1300;
2189//		else if (dev->status == DEV_CLONE_N_GO)
2190//			cv = INTEL_VERSION_1206;
2191		else if (vol->v_disks_count > 4)
2192			cv = INTEL_VERSION_1204;
2193		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5)
2194			cv = INTEL_VERSION_1202;
2195		else if (vol->v_disks_count > 2)
2196			cv = INTEL_VERSION_1201;
2197		else if (vi > 0)
2198			cv = INTEL_VERSION_1200;
2199		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
2200			cv = INTEL_VERSION_1100;
2201		else
2202			cv = INTEL_VERSION_1000;
2203		if (strcmp(cv, version) > 0)
2204			version = cv;
2205
2206		strlcpy(&mvol->name[0], vol->v_name, sizeof(mvol->name));
2207		mvol->total_sectors = vol->v_mediasize / sectorsize;
2208
2209		/* Check for any recovery in progress. */
2210		state = G_RAID_SUBDISK_S_ACTIVE;
2211		pos = 0x7fffffffffffffffllu;
2212		stale = 0;
2213		for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
2214			sd = &vol->v_subdisks[sdi];
2215			if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD)
2216				state = G_RAID_SUBDISK_S_REBUILD;
2217			else if (sd->sd_state == G_RAID_SUBDISK_S_RESYNC &&
2218			    state != G_RAID_SUBDISK_S_REBUILD)
2219				state = G_RAID_SUBDISK_S_RESYNC;
2220			else if (sd->sd_state == G_RAID_SUBDISK_S_STALE)
2221				stale = 1;
2222			if ((sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
2223			    sd->sd_state == G_RAID_SUBDISK_S_RESYNC) &&
2224			     sd->sd_rebuild_pos < pos)
2225			        pos = sd->sd_rebuild_pos;
2226		}
2227		if (state == G_RAID_SUBDISK_S_REBUILD) {
2228			mvol->migr_state = 1;
2229			mvol->migr_type = INTEL_MT_REBUILD;
2230		} else if (state == G_RAID_SUBDISK_S_RESYNC) {
2231			mvol->migr_state = 1;
2232			/* mvol->migr_type = INTEL_MT_REPAIR; */
2233			mvol->migr_type = INTEL_MT_VERIFY;
2234			mvol->state |= INTEL_ST_VERIFY_AND_FIX;
2235		} else
2236			mvol->migr_state = 0;
2237		mvol->dirty = (vol->v_dirty || stale);
2238
2239		mmap0 = intel_get_map(mvol, 0);
2240
2241		/* Write map / common part of two maps. */
2242		intel_set_map_offset(mmap0, sd->sd_offset / sectorsize);
2243		intel_set_map_disk_sectors(mmap0, sd->sd_size / sectorsize);
2244		mmap0->strip_sectors = vol->v_strip_size / sectorsize;
2245		if (vol->v_state == G_RAID_VOLUME_S_BROKEN)
2246			mmap0->status = INTEL_S_FAILURE;
2247		else if (vol->v_state == G_RAID_VOLUME_S_DEGRADED)
2248			mmap0->status = INTEL_S_DEGRADED;
2249		else
2250			mmap0->status = INTEL_S_READY;
2251		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0)
2252			mmap0->type = INTEL_T_RAID0;
2253		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 ||
2254		    vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
2255			mmap0->type = INTEL_T_RAID1;
2256		else
2257			mmap0->type = INTEL_T_RAID5;
2258		mmap0->total_disks = vol->v_disks_count;
2259		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
2260			mmap0->total_domains = vol->v_disks_count;
2261		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
2262			mmap0->total_domains = 2;
2263		else
2264			mmap0->total_domains = 1;
2265		intel_set_map_stripe_count(mmap0,
2266		    sd->sd_size / vol->v_strip_size / mmap0->total_domains);
2267		mmap0->failed_disk_num = 0xff;
2268		mmap0->ddf = 1;
2269
2270		/* If there are two maps - copy common and update. */
2271		if (mvol->migr_state) {
2272			intel_set_vol_curr_migr_unit(mvol,
2273			    pos / vol->v_strip_size / mmap0->total_domains);
2274			mmap1 = intel_get_map(mvol, 1);
2275			memcpy(mmap1, mmap0, sizeof(struct intel_raid_map));
2276			mmap0->status = INTEL_S_READY;
2277		} else
2278			mmap1 = NULL;
2279
2280		/* Write disk indexes and put rebuild flags. */
2281		for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
2282			sd = &vol->v_subdisks[sdi];
2283			pd = (struct g_raid_md_intel_perdisk *)
2284			    sd->sd_disk->d_md_data;
2285			mmap0->disk_idx[sdi] = pd->pd_disk_pos;
2286			if (mvol->migr_state)
2287				mmap1->disk_idx[sdi] = pd->pd_disk_pos;
2288			if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
2289			    sd->sd_state == G_RAID_SUBDISK_S_RESYNC) {
2290				mmap1->disk_idx[sdi] |= INTEL_DI_RBLD;
2291			} else if (sd->sd_state != G_RAID_SUBDISK_S_ACTIVE &&
2292			    sd->sd_state != G_RAID_SUBDISK_S_STALE) {
2293				mmap0->disk_idx[sdi] |= INTEL_DI_RBLD;
2294				if (mvol->migr_state)
2295					mmap1->disk_idx[sdi] |= INTEL_DI_RBLD;
2296			}
2297			if ((sd->sd_state == G_RAID_SUBDISK_S_NONE ||
2298			     sd->sd_state == G_RAID_SUBDISK_S_FAILED) &&
2299			    mmap0->failed_disk_num == 0xff) {
2300				mmap0->failed_disk_num = sdi;
2301				if (mvol->migr_state)
2302					mmap1->failed_disk_num = sdi;
2303			}
2304		}
2305		vi++;
2306	}
2307	meta->total_volumes = vi;
2308	if (strcmp(version, INTEL_VERSION_1300) != 0)
2309		meta->attributes &= INTEL_ATTR_CHECKSUM;
2310	memcpy(&meta->version[0], version, sizeof(INTEL_VERSION_1000) - 1);
2311
2312	/* We are done. Print meta data and store them to disks. */
2313	g_raid_md_intel_print(meta);
2314	if (mdi->mdio_meta != NULL)
2315		free(mdi->mdio_meta, M_MD_INTEL);
2316	mdi->mdio_meta = meta;
2317	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2318		pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
2319		if (disk->d_state != G_RAID_DISK_S_ACTIVE)
2320			continue;
2321		if (pd->pd_meta != NULL) {
2322			free(pd->pd_meta, M_MD_INTEL);
2323			pd->pd_meta = NULL;
2324		}
2325		pd->pd_meta = intel_meta_copy(meta);
2326		intel_meta_write(disk->d_consumer, meta);
2327	}
2328	return (0);
2329}
2330
2331static int
2332g_raid_md_fail_disk_intel(struct g_raid_md_object *md,
2333    struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
2334{
2335	struct g_raid_softc *sc;
2336	struct g_raid_md_intel_object *mdi;
2337	struct g_raid_md_intel_perdisk *pd;
2338	struct g_raid_subdisk *sd;
2339
2340	sc = md->mdo_softc;
2341	mdi = (struct g_raid_md_intel_object *)md;
2342	pd = (struct g_raid_md_intel_perdisk *)tdisk->d_md_data;
2343
2344	/* We can't fail disk that is not a part of array now. */
2345	if (pd->pd_disk_pos < 0)
2346		return (-1);
2347
2348	/*
2349	 * Mark disk as failed in metadata and try to write that metadata
2350	 * to the disk itself to prevent it's later resurrection as STALE.
2351	 */
2352	mdi->mdio_meta->disk[pd->pd_disk_pos].flags = INTEL_F_FAILED;
2353	pd->pd_disk_meta.flags = INTEL_F_FAILED;
2354	g_raid_md_intel_print(mdi->mdio_meta);
2355	if (tdisk->d_consumer)
2356		intel_meta_write(tdisk->d_consumer, mdi->mdio_meta);
2357
2358	/* Change states. */
2359	g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED);
2360	TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) {
2361		g_raid_change_subdisk_state(sd,
2362		    G_RAID_SUBDISK_S_FAILED);
2363		g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED,
2364		    G_RAID_EVENT_SUBDISK);
2365	}
2366
2367	/* Write updated metadata to remaining disks. */
2368	g_raid_md_write_intel(md, NULL, NULL, tdisk);
2369
2370	/* Check if anything left except placeholders. */
2371	if (g_raid_ndisks(sc, -1) ==
2372	    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
2373		g_raid_destroy_node(sc, 0);
2374	else
2375		g_raid_md_intel_refill(sc);
2376	return (0);
2377}
2378
2379static int
2380g_raid_md_free_disk_intel(struct g_raid_md_object *md,
2381    struct g_raid_disk *disk)
2382{
2383	struct g_raid_md_intel_perdisk *pd;
2384
2385	pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
2386	if (pd->pd_meta != NULL) {
2387		free(pd->pd_meta, M_MD_INTEL);
2388		pd->pd_meta = NULL;
2389	}
2390	free(pd, M_MD_INTEL);
2391	disk->d_md_data = NULL;
2392	return (0);
2393}
2394
2395static int
2396g_raid_md_free_intel(struct g_raid_md_object *md)
2397{
2398	struct g_raid_md_intel_object *mdi;
2399
2400	mdi = (struct g_raid_md_intel_object *)md;
2401	if (!mdi->mdio_started) {
2402		mdi->mdio_started = 0;
2403		callout_stop(&mdi->mdio_start_co);
2404		G_RAID_DEBUG1(1, md->mdo_softc,
2405		    "root_mount_rel %p", mdi->mdio_rootmount);
2406		root_mount_rel(mdi->mdio_rootmount);
2407		mdi->mdio_rootmount = NULL;
2408	}
2409	if (mdi->mdio_meta != NULL) {
2410		free(mdi->mdio_meta, M_MD_INTEL);
2411		mdi->mdio_meta = NULL;
2412	}
2413	return (0);
2414}
2415
2416G_RAID_MD_DECLARE(g_raid_md_intel);
2417