1/*-
2 * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: releng/11.0/sys/geom/part/g_part_ldm.c 298808 2016-04-29 20:56:58Z pfg $");
29
30#include <sys/param.h>
31#include <sys/bio.h>
32#include <sys/diskmbr.h>
33#include <sys/endian.h>
34#include <sys/gpt.h>
35#include <sys/kernel.h>
36#include <sys/kobj.h>
37#include <sys/limits.h>
38#include <sys/lock.h>
39#include <sys/malloc.h>
40#include <sys/mutex.h>
41#include <sys/queue.h>
42#include <sys/sbuf.h>
43#include <sys/systm.h>
44#include <sys/sysctl.h>
45#include <sys/uuid.h>
46#include <geom/geom.h>
47#include <geom/part/g_part.h>
48
49#include "g_part_if.h"
50
51FEATURE(geom_part_ldm, "GEOM partitioning class for LDM support");
52
53SYSCTL_DECL(_kern_geom_part);
54static SYSCTL_NODE(_kern_geom_part, OID_AUTO, ldm, CTLFLAG_RW, 0,
55    "GEOM_PART_LDM Logical Disk Manager");
56
57static u_int ldm_debug = 0;
58SYSCTL_UINT(_kern_geom_part_ldm, OID_AUTO, debug,
59    CTLFLAG_RWTUN, &ldm_debug, 0, "Debug level");
60
61/*
62 * This allows access to mirrored LDM volumes. Since we do not
63 * doing mirroring here, it is not enabled by default.
64 */
65static u_int show_mirrors = 0;
66SYSCTL_UINT(_kern_geom_part_ldm, OID_AUTO, show_mirrors,
67    CTLFLAG_RWTUN, &show_mirrors, 0, "Show mirrored volumes");
68
69#define	LDM_DEBUG(lvl, fmt, ...)	do {				\
70	if (ldm_debug >= (lvl)) {					\
71		printf("GEOM_PART: " fmt "\n", __VA_ARGS__);		\
72	}								\
73} while (0)
74#define	LDM_DUMP(buf, size)	do {					\
75	if (ldm_debug > 1) {						\
76		hexdump(buf, size, NULL, 0);				\
77	}								\
78} while (0)
79
80/*
81 * There are internal representations of LDM structures.
82 *
83 * We do not keep all fields of on-disk structures, only most useful.
84 * All numbers in an on-disk structures are in big-endian format.
85 */
86
87/*
88 * Private header is 512 bytes long. There are three copies on each disk.
89 * Offset and sizes are in sectors. Location of each copy:
90 * - the first offset is relative to the disk start;
91 * - the second and third offset are relative to the LDM database start.
92 *
93 * On a disk partitioned with GPT, the LDM has not first private header.
94 */
95#define	LDM_PH_MBRINDEX		0
96#define	LDM_PH_GPTINDEX		2
97static const uint64_t	ldm_ph_off[] = {6, 1856, 2047};
98#define	LDM_VERSION_2K		0x2000b
99#define	LDM_VERSION_VISTA	0x2000c
100#define	LDM_PH_VERSION_OFF	0x00c
101#define	LDM_PH_DISKGUID_OFF	0x030
102#define	LDM_PH_DGGUID_OFF	0x0b0
103#define	LDM_PH_DGNAME_OFF	0x0f0
104#define	LDM_PH_START_OFF	0x11b
105#define	LDM_PH_SIZE_OFF		0x123
106#define	LDM_PH_DB_OFF		0x12b
107#define	LDM_PH_DBSIZE_OFF	0x133
108#define	LDM_PH_TH1_OFF		0x13b
109#define	LDM_PH_TH2_OFF		0x143
110#define	LDM_PH_CONFSIZE_OFF	0x153
111#define	LDM_PH_LOGSIZE_OFF	0x15b
112#define	LDM_PH_SIGN		"PRIVHEAD"
113struct ldm_privhdr {
114	struct uuid	disk_guid;
115	struct uuid	dg_guid;
116	u_char		dg_name[32];
117	uint64_t	start;		/* logical disk start */
118	uint64_t	size;		/* logical disk size */
119	uint64_t	db_offset;	/* LDM database start */
120#define	LDM_DB_SIZE		2048
121	uint64_t	db_size;	/* LDM database size */
122#define	LDM_TH_COUNT		2
123	uint64_t	th_offset[LDM_TH_COUNT]; /* TOC header offsets */
124	uint64_t	conf_size;	/* configuration size */
125	uint64_t	log_size;	/* size of log */
126};
127
128/*
129 * Table of contents header is 512 bytes long.
130 * There are two identical copies at offsets from the private header.
131 * Offsets are relative to the LDM database start.
132 */
133#define	LDM_TH_SIGN		"TOCBLOCK"
134#define	LDM_TH_NAME1		"config"
135#define	LDM_TH_NAME2		"log"
136#define	LDM_TH_NAME1_OFF	0x024
137#define	LDM_TH_CONF_OFF		0x02e
138#define	LDM_TH_CONFSIZE_OFF	0x036
139#define	LDM_TH_NAME2_OFF	0x046
140#define	LDM_TH_LOG_OFF		0x050
141#define	LDM_TH_LOGSIZE_OFF	0x058
142struct ldm_tochdr {
143	uint64_t	conf_offset;	/* configuration offset */
144	uint64_t	log_offset;	/* log offset */
145};
146
147/*
148 * LDM database header is 512 bytes long.
149 */
150#define	LDM_VMDB_SIGN		"VMDB"
151#define	LDM_DB_LASTSEQ_OFF	0x004
152#define	LDM_DB_SIZE_OFF		0x008
153#define	LDM_DB_STATUS_OFF	0x010
154#define	LDM_DB_VERSION_OFF	0x012
155#define	LDM_DB_DGNAME_OFF	0x016
156#define	LDM_DB_DGGUID_OFF	0x035
157struct ldm_vmdbhdr {
158	uint32_t	last_seq;	/* sequence number of last VBLK */
159	uint32_t	size;		/* size of VBLK */
160};
161
162/*
163 * The LDM database configuration section contains VMDB header and
164 * many VBLKs. Each VBLK represents a disk group, disk partition,
165 * component or volume.
166 *
167 * The most interesting for us are volumes, they are represents
168 * partitions in the GEOM_PART meaning. But volume VBLK does not
169 * contain all information needed to create GEOM provider. And we
170 * should get this information from the related VBLK. This is how
171 * VBLK releated:
172 *	Volumes <- Components <- Partitions -> Disks
173 *
174 * One volume can contain several components. In this case LDM
175 * does mirroring of volume data to each component.
176 *
177 * Also each component can contain several partitions (spanned or
178 * striped volumes).
179 */
180
181struct ldm_component {
182	uint64_t	id;		/* object id */
183	uint64_t	vol_id;		/* parent volume object id */
184
185	int		count;
186	LIST_HEAD(, ldm_partition) partitions;
187	LIST_ENTRY(ldm_component) entry;
188};
189
190struct ldm_volume {
191	uint64_t	id;		/* object id */
192	uint64_t	size;		/* volume size */
193	uint8_t		number;		/* used for ordering */
194	uint8_t		part_type;	/* partition type */
195
196	int		count;
197	LIST_HEAD(, ldm_component) components;
198	LIST_ENTRY(ldm_volume)	entry;
199};
200
201struct ldm_disk {
202	uint64_t	id;		/* object id */
203	struct uuid	guid;		/* disk guid */
204
205	LIST_ENTRY(ldm_disk) entry;
206};
207
208#if 0
209struct ldm_disk_group {
210	uint64_t	id;		/* object id */
211	struct uuid	guid;		/* disk group guid */
212	u_char		name[32];	/* disk group name */
213
214	LIST_ENTRY(ldm_disk_group) entry;
215};
216#endif
217
218struct ldm_partition {
219	uint64_t	id;		/* object id */
220	uint64_t	disk_id;	/* disk object id */
221	uint64_t	comp_id;	/* parent component object id */
222	uint64_t	start;		/* offset relative to disk start */
223	uint64_t	offset;		/* offset for spanned volumes */
224	uint64_t	size;		/* partition size */
225
226	LIST_ENTRY(ldm_partition) entry;
227};
228
229/*
230 * Each VBLK is 128 bytes long and has standard 16 bytes header.
231 * Some of VBLK's fields are fixed size, but others has variable size.
232 * Fields with variable size are prefixed with one byte length marker.
233 * Some fields are strings and also can have fixed size and variable.
234 * Strings with fixed size are NULL-terminated, others are not.
235 * All VBLKs have same several first fields:
236 *	Offset		Size		Description
237 *	---------------+---------------+--------------------------
238 *	0x00		16		standard VBLK header
239 *	0x10		2		update status
240 *	0x13		1		VBLK type
241 *	0x18		PS		object id
242 *	0x18+		PN		object name
243 *
244 *  o Offset 0x18+ means '0x18 + length of all variable-width fields'
245 *  o 'P' in size column means 'prefixed' (variable-width),
246 *    'S' - string, 'N' - number.
247 */
248#define	LDM_VBLK_SIGN		"VBLK"
249#define	LDM_VBLK_SEQ_OFF	0x04
250#define	LDM_VBLK_GROUP_OFF	0x08
251#define	LDM_VBLK_INDEX_OFF	0x0c
252#define	LDM_VBLK_COUNT_OFF	0x0e
253#define	LDM_VBLK_TYPE_OFF	0x13
254#define	LDM_VBLK_OID_OFF	0x18
255struct ldm_vblkhdr {
256	uint32_t	seq;		/* sequence number */
257	uint32_t	group;		/* group number */
258	uint16_t	index;		/* index in the group */
259	uint16_t	count;		/* number of entries in the group */
260};
261
262#define	LDM_VBLK_T_COMPONENT	0x32
263#define	LDM_VBLK_T_PARTITION	0x33
264#define	LDM_VBLK_T_DISK		0x34
265#define	LDM_VBLK_T_DISKGROUP	0x35
266#define	LDM_VBLK_T_DISK4	0x44
267#define	LDM_VBLK_T_DISKGROUP4	0x45
268#define	LDM_VBLK_T_VOLUME	0x51
269struct ldm_vblk {
270	uint8_t		type;		/* VBLK type */
271	union {
272		uint64_t		id;
273		struct ldm_volume	vol;
274		struct ldm_component	comp;
275		struct ldm_disk		disk;
276		struct ldm_partition	part;
277#if 0
278		struct ldm_disk_group	disk_group;
279#endif
280	} u;
281	LIST_ENTRY(ldm_vblk) entry;
282};
283
284/*
285 * Some VBLKs contains a bit more data than can fit into 128 bytes. These
286 * VBLKs are called eXtended VBLK. Before parsing, the data from these VBLK
287 * should be placed into continuous memory buffer. We can determine xVBLK
288 * by the count field in the standard VBLK header (count > 1).
289 */
290struct ldm_xvblk {
291	uint32_t	group;		/* xVBLK group number */
292	uint32_t	size;		/* the total size of xVBLK */
293	uint8_t		map;		/* bitmask of currently saved VBLKs */
294	u_char		*data;		/* xVBLK data */
295
296	LIST_ENTRY(ldm_xvblk)	entry;
297};
298
299/* The internal representation of LDM database. */
300struct ldm_db {
301	struct ldm_privhdr		ph;	/* private header */
302	struct ldm_tochdr		th;	/* TOC header */
303	struct ldm_vmdbhdr		dh;	/* VMDB header */
304
305	LIST_HEAD(, ldm_volume)		volumes;
306	LIST_HEAD(, ldm_disk)		disks;
307	LIST_HEAD(, ldm_vblk)		vblks;
308	LIST_HEAD(, ldm_xvblk)		xvblks;
309};
310
311static struct uuid gpt_uuid_ms_ldm_metadata = GPT_ENT_TYPE_MS_LDM_METADATA;
312
313struct g_part_ldm_table {
314	struct g_part_table	base;
315	uint64_t		db_offset;
316	int			is_gpt;
317};
318struct g_part_ldm_entry {
319	struct g_part_entry	base;
320	uint8_t			type;
321};
322
323static int g_part_ldm_add(struct g_part_table *, struct g_part_entry *,
324    struct g_part_parms *);
325static int g_part_ldm_bootcode(struct g_part_table *, struct g_part_parms *);
326static int g_part_ldm_create(struct g_part_table *, struct g_part_parms *);
327static int g_part_ldm_destroy(struct g_part_table *, struct g_part_parms *);
328static void g_part_ldm_dumpconf(struct g_part_table *, struct g_part_entry *,
329    struct sbuf *, const char *);
330static int g_part_ldm_dumpto(struct g_part_table *, struct g_part_entry *);
331static int g_part_ldm_modify(struct g_part_table *, struct g_part_entry *,
332    struct g_part_parms *);
333static const char *g_part_ldm_name(struct g_part_table *, struct g_part_entry *,
334    char *, size_t);
335static int g_part_ldm_probe(struct g_part_table *, struct g_consumer *);
336static int g_part_ldm_read(struct g_part_table *, struct g_consumer *);
337static const char *g_part_ldm_type(struct g_part_table *, struct g_part_entry *,
338    char *, size_t);
339static int g_part_ldm_write(struct g_part_table *, struct g_consumer *);
340
341static kobj_method_t g_part_ldm_methods[] = {
342	KOBJMETHOD(g_part_add,		g_part_ldm_add),
343	KOBJMETHOD(g_part_bootcode,	g_part_ldm_bootcode),
344	KOBJMETHOD(g_part_create,	g_part_ldm_create),
345	KOBJMETHOD(g_part_destroy,	g_part_ldm_destroy),
346	KOBJMETHOD(g_part_dumpconf,	g_part_ldm_dumpconf),
347	KOBJMETHOD(g_part_dumpto,	g_part_ldm_dumpto),
348	KOBJMETHOD(g_part_modify,	g_part_ldm_modify),
349	KOBJMETHOD(g_part_name,		g_part_ldm_name),
350	KOBJMETHOD(g_part_probe,	g_part_ldm_probe),
351	KOBJMETHOD(g_part_read,		g_part_ldm_read),
352	KOBJMETHOD(g_part_type,		g_part_ldm_type),
353	KOBJMETHOD(g_part_write,	g_part_ldm_write),
354	{ 0, 0 }
355};
356
357static struct g_part_scheme g_part_ldm_scheme = {
358	"LDM",
359	g_part_ldm_methods,
360	sizeof(struct g_part_ldm_table),
361	.gps_entrysz = sizeof(struct g_part_ldm_entry)
362};
363G_PART_SCHEME_DECLARE(g_part_ldm);
364
365static struct g_part_ldm_alias {
366	u_char		typ;
367	int		alias;
368} ldm_alias_match[] = {
369	{ DOSPTYP_NTFS,		G_PART_ALIAS_MS_NTFS },
370	{ DOSPTYP_FAT32,	G_PART_ALIAS_MS_FAT32 },
371	{ DOSPTYP_386BSD,	G_PART_ALIAS_FREEBSD },
372	{ DOSPTYP_LDM,		G_PART_ALIAS_MS_LDM_DATA },
373	{ DOSPTYP_LINSWP,	G_PART_ALIAS_LINUX_SWAP },
374	{ DOSPTYP_LINUX,	G_PART_ALIAS_LINUX_DATA },
375	{ DOSPTYP_LINLVM,	G_PART_ALIAS_LINUX_LVM },
376	{ DOSPTYP_LINRAID,	G_PART_ALIAS_LINUX_RAID },
377};
378
379static u_char*
380ldm_privhdr_read(struct g_consumer *cp, uint64_t off, int *error)
381{
382	struct g_provider *pp;
383	u_char *buf;
384
385	pp = cp->provider;
386	buf = g_read_data(cp, off, pp->sectorsize, error);
387	if (buf == NULL)
388		return (NULL);
389
390	if (memcmp(buf, LDM_PH_SIGN, strlen(LDM_PH_SIGN)) != 0) {
391		LDM_DEBUG(1, "%s: invalid LDM private header signature",
392		    pp->name);
393		g_free(buf);
394		buf = NULL;
395		*error = EINVAL;
396	}
397	return (buf);
398}
399
400static int
401ldm_privhdr_parse(struct g_consumer *cp, struct ldm_privhdr *hdr,
402    const u_char *buf)
403{
404	uint32_t version;
405	int error;
406
407	memset(hdr, 0, sizeof(*hdr));
408	version = be32dec(buf + LDM_PH_VERSION_OFF);
409	if (version != LDM_VERSION_2K &&
410	    version != LDM_VERSION_VISTA) {
411		LDM_DEBUG(0, "%s: unsupported LDM version %u.%u",
412		    cp->provider->name, version >> 16,
413		    version & 0xFFFF);
414		return (ENXIO);
415	}
416	error = parse_uuid(buf + LDM_PH_DISKGUID_OFF, &hdr->disk_guid);
417	if (error != 0)
418		return (error);
419	error = parse_uuid(buf + LDM_PH_DGGUID_OFF, &hdr->dg_guid);
420	if (error != 0)
421		return (error);
422	strncpy(hdr->dg_name, buf + LDM_PH_DGNAME_OFF, sizeof(hdr->dg_name));
423	hdr->start = be64dec(buf + LDM_PH_START_OFF);
424	hdr->size = be64dec(buf + LDM_PH_SIZE_OFF);
425	hdr->db_offset = be64dec(buf + LDM_PH_DB_OFF);
426	hdr->db_size = be64dec(buf + LDM_PH_DBSIZE_OFF);
427	hdr->th_offset[0] = be64dec(buf + LDM_PH_TH1_OFF);
428	hdr->th_offset[1] = be64dec(buf + LDM_PH_TH2_OFF);
429	hdr->conf_size = be64dec(buf + LDM_PH_CONFSIZE_OFF);
430	hdr->log_size = be64dec(buf + LDM_PH_LOGSIZE_OFF);
431	return (0);
432}
433
434static int
435ldm_privhdr_check(struct ldm_db *db, struct g_consumer *cp, int is_gpt)
436{
437	struct g_consumer *cp2;
438	struct g_provider *pp;
439	struct ldm_privhdr hdr;
440	uint64_t offset, last;
441	int error, found, i;
442	u_char *buf;
443
444	pp = cp->provider;
445	if (is_gpt) {
446		/*
447		 * The last LBA is used in several checks below, for the
448		 * GPT case it should be calculated relative to the whole
449		 * disk.
450		 */
451		cp2 = LIST_FIRST(&pp->geom->consumer);
452		last =
453		    cp2->provider->mediasize / cp2->provider->sectorsize - 1;
454	} else
455		last = pp->mediasize / pp->sectorsize - 1;
456	for (found = 0, i = is_gpt; i < nitems(ldm_ph_off); i++) {
457		offset = ldm_ph_off[i];
458		/*
459		 * In the GPT case consumer is attached to the LDM metadata
460		 * partition and we don't need add db_offset.
461		 */
462		if (!is_gpt)
463			offset += db->ph.db_offset;
464		if (i == LDM_PH_MBRINDEX) {
465			/*
466			 * Prepare to errors and setup new base offset
467			 * to read backup private headers. Assume that LDM
468			 * database is in the last 1Mbyte area.
469			 */
470			db->ph.db_offset = last - LDM_DB_SIZE;
471		}
472		buf = ldm_privhdr_read(cp, offset * pp->sectorsize, &error);
473		if (buf == NULL) {
474			LDM_DEBUG(1, "%s: failed to read private header "
475			    "%d at LBA %ju", pp->name, i, (uintmax_t)offset);
476			continue;
477		}
478		error = ldm_privhdr_parse(cp, &hdr, buf);
479		if (error != 0) {
480			LDM_DEBUG(1, "%s: failed to parse private "
481			    "header %d", pp->name, i);
482			LDM_DUMP(buf, pp->sectorsize);
483			g_free(buf);
484			continue;
485		}
486		g_free(buf);
487		if (hdr.start > last ||
488		    hdr.start + hdr.size - 1 > last ||
489		    (hdr.start + hdr.size - 1 > hdr.db_offset && !is_gpt) ||
490		    hdr.db_size != LDM_DB_SIZE ||
491		    hdr.db_offset + LDM_DB_SIZE - 1 > last ||
492		    hdr.th_offset[0] >= LDM_DB_SIZE ||
493		    hdr.th_offset[1] >= LDM_DB_SIZE ||
494		    hdr.conf_size + hdr.log_size >= LDM_DB_SIZE) {
495			LDM_DEBUG(1, "%s: invalid values in the "
496			    "private header %d", pp->name, i);
497			LDM_DEBUG(2, "%s: start: %jd, size: %jd, "
498			    "db_offset: %jd, db_size: %jd, th_offset0: %jd, "
499			    "th_offset1: %jd, conf_size: %jd, log_size: %jd, "
500			    "last: %jd", pp->name, hdr.start, hdr.size,
501			    hdr.db_offset, hdr.db_size, hdr.th_offset[0],
502			    hdr.th_offset[1], hdr.conf_size, hdr.log_size,
503			    last);
504			continue;
505		}
506		if (found != 0 && memcmp(&db->ph, &hdr, sizeof(hdr)) != 0) {
507			LDM_DEBUG(0, "%s: private headers are not equal",
508			    pp->name);
509			if (i > 1) {
510				/*
511				 * We have different headers in the LDM.
512				 * We can not trust this metadata.
513				 */
514				LDM_DEBUG(0, "%s: refuse LDM metadata",
515				    pp->name);
516				return (EINVAL);
517			}
518			/*
519			 * We already have read primary private header
520			 * and it differs from this backup one.
521			 * Prefer the backup header and save it.
522			 */
523			found = 0;
524		}
525		if (found == 0)
526			memcpy(&db->ph, &hdr, sizeof(hdr));
527		found = 1;
528	}
529	if (found == 0) {
530		LDM_DEBUG(1, "%s: valid LDM private header not found",
531		    pp->name);
532		return (ENXIO);
533	}
534	return (0);
535}
536
537static int
538ldm_gpt_check(struct ldm_db *db, struct g_consumer *cp)
539{
540	struct g_part_table *gpt;
541	struct g_part_entry *e;
542	struct g_consumer *cp2;
543	int error;
544
545	cp2 = LIST_NEXT(cp, consumer);
546	g_topology_lock();
547	gpt = cp->provider->geom->softc;
548	error = 0;
549	LIST_FOREACH(e, &gpt->gpt_entry, gpe_entry) {
550		if (cp->provider == e->gpe_pp) {
551			/* ms-ldm-metadata partition */
552			if (e->gpe_start != db->ph.db_offset ||
553			    e->gpe_end != db->ph.db_offset + LDM_DB_SIZE - 1)
554				error++;
555		} else if (cp2->provider == e->gpe_pp) {
556			/* ms-ldm-data partition */
557			if (e->gpe_start != db->ph.start ||
558			    e->gpe_end != db->ph.start + db->ph.size - 1)
559				error++;
560		}
561		if (error != 0) {
562			LDM_DEBUG(0, "%s: GPT partition %d boundaries "
563			    "do not match with the LDM metadata",
564			    e->gpe_pp->name, e->gpe_index);
565			error = ENXIO;
566			break;
567		}
568	}
569	g_topology_unlock();
570	return (error);
571}
572
573static int
574ldm_tochdr_check(struct ldm_db *db, struct g_consumer *cp)
575{
576	struct g_provider *pp;
577	struct ldm_tochdr hdr;
578	uint64_t offset, conf_size, log_size;
579	int error, found, i;
580	u_char *buf;
581
582	pp = cp->provider;
583	for (i = 0, found = 0; i < LDM_TH_COUNT; i++) {
584		offset = db->ph.db_offset + db->ph.th_offset[i];
585		buf = g_read_data(cp,
586		    offset * pp->sectorsize, pp->sectorsize, &error);
587		if (buf == NULL) {
588			LDM_DEBUG(1, "%s: failed to read TOC header "
589			    "at LBA %ju", pp->name, (uintmax_t)offset);
590			continue;
591		}
592		if (memcmp(buf, LDM_TH_SIGN, strlen(LDM_TH_SIGN)) != 0 ||
593		    memcmp(buf + LDM_TH_NAME1_OFF, LDM_TH_NAME1,
594		    strlen(LDM_TH_NAME1)) != 0 ||
595		    memcmp(buf + LDM_TH_NAME2_OFF, LDM_TH_NAME2,
596		    strlen(LDM_TH_NAME2)) != 0) {
597			LDM_DEBUG(1, "%s: failed to parse TOC header "
598			    "at LBA %ju", pp->name, (uintmax_t)offset);
599			LDM_DUMP(buf, pp->sectorsize);
600			g_free(buf);
601			continue;
602		}
603		hdr.conf_offset = be64dec(buf + LDM_TH_CONF_OFF);
604		hdr.log_offset = be64dec(buf + LDM_TH_LOG_OFF);
605		conf_size = be64dec(buf + LDM_TH_CONFSIZE_OFF);
606		log_size = be64dec(buf + LDM_TH_LOGSIZE_OFF);
607		if (conf_size != db->ph.conf_size ||
608		    hdr.conf_offset + conf_size >= LDM_DB_SIZE ||
609		    log_size != db->ph.log_size ||
610		    hdr.log_offset + log_size >= LDM_DB_SIZE) {
611			LDM_DEBUG(1, "%s: invalid values in the "
612			    "TOC header at LBA %ju", pp->name,
613			    (uintmax_t)offset);
614			LDM_DUMP(buf, pp->sectorsize);
615			g_free(buf);
616			continue;
617		}
618		g_free(buf);
619		if (found == 0)
620			memcpy(&db->th, &hdr, sizeof(hdr));
621		found = 1;
622	}
623	if (found == 0) {
624		LDM_DEBUG(0, "%s: valid LDM TOC header not found.",
625		    pp->name);
626		return (ENXIO);
627	}
628	return (0);
629}
630
631static int
632ldm_vmdbhdr_check(struct ldm_db *db, struct g_consumer *cp)
633{
634	struct g_provider *pp;
635	struct uuid dg_guid;
636	uint64_t offset;
637	uint32_t version;
638	int error;
639	u_char *buf;
640
641	pp = cp->provider;
642	offset = db->ph.db_offset + db->th.conf_offset;
643	buf = g_read_data(cp, offset * pp->sectorsize, pp->sectorsize,
644	    &error);
645	if (buf == NULL) {
646		LDM_DEBUG(0, "%s: failed to read VMDB header at "
647		    "LBA %ju", pp->name, (uintmax_t)offset);
648		return (error);
649	}
650	if (memcmp(buf, LDM_VMDB_SIGN, strlen(LDM_VMDB_SIGN)) != 0) {
651		g_free(buf);
652		LDM_DEBUG(0, "%s: failed to parse VMDB header at "
653		    "LBA %ju", pp->name, (uintmax_t)offset);
654		return (ENXIO);
655	}
656	/* Check version. */
657	version = be32dec(buf + LDM_DB_VERSION_OFF);
658	if (version != 0x4000A) {
659		g_free(buf);
660		LDM_DEBUG(0, "%s: unsupported VMDB version %u.%u",
661		    pp->name, version >> 16, version & 0xFFFF);
662		return (ENXIO);
663	}
664	/*
665	 * Check VMDB update status:
666	 *	1 - in a consistent state;
667	 *	2 - in a creation phase;
668	 *	3 - in a deletion phase;
669	 */
670	if (be16dec(buf + LDM_DB_STATUS_OFF) != 1) {
671		g_free(buf);
672		LDM_DEBUG(0, "%s: VMDB is not in a consistent state",
673		    pp->name);
674		return (ENXIO);
675	}
676	db->dh.last_seq = be32dec(buf + LDM_DB_LASTSEQ_OFF);
677	db->dh.size = be32dec(buf + LDM_DB_SIZE_OFF);
678	error = parse_uuid(buf + LDM_DB_DGGUID_OFF, &dg_guid);
679	/* Compare disk group name and guid from VMDB and private headers */
680	if (error != 0 || db->dh.size == 0 ||
681	    pp->sectorsize % db->dh.size != 0 ||
682	    strncmp(buf + LDM_DB_DGNAME_OFF, db->ph.dg_name, 31) != 0 ||
683	    memcmp(&dg_guid, &db->ph.dg_guid, sizeof(dg_guid)) != 0 ||
684	    db->dh.size * db->dh.last_seq >
685	    db->ph.conf_size * pp->sectorsize) {
686		LDM_DEBUG(0, "%s: invalid values in the VMDB header",
687		    pp->name);
688		LDM_DUMP(buf, pp->sectorsize);
689		g_free(buf);
690		return (EINVAL);
691	}
692	g_free(buf);
693	return (0);
694}
695
696static int
697ldm_xvblk_handle(struct ldm_db *db, struct ldm_vblkhdr *vh, const u_char *p)
698{
699	struct ldm_xvblk *blk;
700	size_t size;
701
702	size = db->dh.size - 16;
703	LIST_FOREACH(blk, &db->xvblks, entry)
704		if (blk->group == vh->group)
705			break;
706	if (blk == NULL) {
707		blk = g_malloc(sizeof(*blk), M_WAITOK | M_ZERO);
708		blk->group = vh->group;
709		blk->size = size * vh->count + 16;
710		blk->data = g_malloc(blk->size, M_WAITOK | M_ZERO);
711		blk->map = 0xFF << vh->count;
712		LIST_INSERT_HEAD(&db->xvblks, blk, entry);
713	}
714	if ((blk->map & (1 << vh->index)) != 0) {
715		/* Block with given index has been already saved. */
716		return (EINVAL);
717	}
718	/* Copy the data block to the place related to index. */
719	memcpy(blk->data + size * vh->index + 16, p + 16, size);
720	blk->map |= 1 << vh->index;
721	return (0);
722}
723
724/* Read the variable-width numeric field and return new offset */
725static int
726ldm_vnum_get(const u_char *buf, int offset, uint64_t *result, size_t range)
727{
728	uint64_t num;
729	uint8_t len;
730
731	len = buf[offset++];
732	if (len > sizeof(uint64_t) || len + offset >= range)
733		return (-1);
734	for (num = 0; len > 0; len--)
735		num = (num << 8) | buf[offset++];
736	*result = num;
737	return (offset);
738}
739
740/* Read the variable-width string and return new offset */
741static int
742ldm_vstr_get(const u_char *buf, int offset, u_char *result,
743    size_t maxlen, size_t range)
744{
745	uint8_t len;
746
747	len = buf[offset++];
748	if (len >= maxlen || len + offset >= range)
749		return (-1);
750	memcpy(result, buf + offset, len);
751	result[len] = '\0';
752	return (offset + len);
753}
754
755/* Just skip the variable-width variable and return new offset */
756static int
757ldm_vparm_skip(const u_char *buf, int offset, size_t range)
758{
759	uint8_t len;
760
761	len = buf[offset++];
762	if (offset + len >= range)
763		return (-1);
764
765	return (offset + len);
766}
767
768static int
769ldm_vblk_handle(struct ldm_db *db, const u_char *p, size_t size)
770{
771	struct ldm_vblk *blk;
772	struct ldm_volume *volume, *last;
773	const char *errstr;
774	u_char vstr[64];
775	int error, offset;
776
777	blk = g_malloc(sizeof(*blk), M_WAITOK | M_ZERO);
778	blk->type = p[LDM_VBLK_TYPE_OFF];
779	offset = ldm_vnum_get(p, LDM_VBLK_OID_OFF, &blk->u.id, size);
780	if (offset < 0) {
781		errstr = "object id";
782		goto fail;
783	}
784	offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
785	if (offset < 0) {
786		errstr = "object name";
787		goto fail;
788	}
789	switch (blk->type) {
790	/*
791	 * Component VBLK fields:
792	 * Offset	Size	Description
793	 * ------------+-------+------------------------
794	 *  0x18+	PS	volume state
795	 *  0x18+5	PN	component children count
796	 *  0x1D+16	PN	parent's volume object id
797	 *  0x2D+1	PN	stripe size
798	 */
799	case LDM_VBLK_T_COMPONENT:
800		offset = ldm_vparm_skip(p, offset, size);
801		if (offset < 0) {
802			errstr = "volume state";
803			goto fail;
804		}
805		offset = ldm_vparm_skip(p, offset + 5, size);
806		if (offset < 0) {
807			errstr = "children count";
808			goto fail;
809		}
810		offset = ldm_vnum_get(p, offset + 16,
811		    &blk->u.comp.vol_id, size);
812		if (offset < 0) {
813			errstr = "volume id";
814			goto fail;
815		}
816		break;
817	/*
818	 * Partition VBLK fields:
819	 * Offset	Size	Description
820	 * ------------+-------+------------------------
821	 *  0x18+12	8	partition start offset
822	 *  0x18+20	8	volume offset
823	 *  0x18+28	PN	partition size
824	 *  0x34+	PN	parent's component object id
825	 *  0x34+	PN	disk's object id
826	 */
827	case LDM_VBLK_T_PARTITION:
828		if (offset + 28 >= size) {
829			errstr = "too small buffer";
830			goto fail;
831		}
832		blk->u.part.start = be64dec(p + offset + 12);
833		blk->u.part.offset = be64dec(p + offset + 20);
834		offset = ldm_vnum_get(p, offset + 28, &blk->u.part.size, size);
835		if (offset < 0) {
836			errstr = "partition size";
837			goto fail;
838		}
839		offset = ldm_vnum_get(p, offset, &blk->u.part.comp_id, size);
840		if (offset < 0) {
841			errstr = "component id";
842			goto fail;
843		}
844		offset = ldm_vnum_get(p, offset, &blk->u.part.disk_id, size);
845		if (offset < 0) {
846			errstr = "disk id";
847			goto fail;
848		}
849		break;
850	/*
851	 * Disk VBLK fields:
852	 * Offset	Size	Description
853	 * ------------+-------+------------------------
854	 *  0x18+	PS	disk GUID
855	 */
856	case LDM_VBLK_T_DISK:
857		errstr = "disk guid";
858		offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
859		if (offset < 0)
860			goto fail;
861		error = parse_uuid(vstr, &blk->u.disk.guid);
862		if (error != 0)
863			goto fail;
864		LIST_INSERT_HEAD(&db->disks, &blk->u.disk, entry);
865		break;
866	/*
867	 * Disk group VBLK fields:
868	 * Offset	Size	Description
869	 * ------------+-------+------------------------
870	 *  0x18+	PS	disk group GUID
871	 */
872	case LDM_VBLK_T_DISKGROUP:
873#if 0
874		strncpy(blk->u.disk_group.name, vstr,
875		    sizeof(blk->u.disk_group.name));
876		offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
877		if (offset < 0) {
878			errstr = "disk group guid";
879			goto fail;
880		}
881		error = parse_uuid(name, &blk->u.disk_group.guid);
882		if (error != 0) {
883			errstr = "disk group guid";
884			goto fail;
885		}
886		LIST_INSERT_HEAD(&db->groups, &blk->u.disk_group, entry);
887#endif
888		break;
889	/*
890	 * Disk VBLK fields:
891	 * Offset	Size	Description
892	 * ------------+-------+------------------------
893	 *  0x18+	16	disk GUID
894	 */
895	case LDM_VBLK_T_DISK4:
896		be_uuid_dec(p + offset, &blk->u.disk.guid);
897		LIST_INSERT_HEAD(&db->disks, &blk->u.disk, entry);
898		break;
899	/*
900	 * Disk group VBLK fields:
901	 * Offset	Size	Description
902	 * ------------+-------+------------------------
903	 *  0x18+	16	disk GUID
904	 */
905	case LDM_VBLK_T_DISKGROUP4:
906#if 0
907		strncpy(blk->u.disk_group.name, vstr,
908		    sizeof(blk->u.disk_group.name));
909		be_uuid_dec(p + offset, &blk->u.disk.guid);
910		LIST_INSERT_HEAD(&db->groups, &blk->u.disk_group, entry);
911#endif
912		break;
913	/*
914	 * Volume VBLK fields:
915	 * Offset	Size	Description
916	 * ------------+-------+------------------------
917	 *  0x18+	PS	volume type
918	 *  0x18+	PS	unknown
919	 *  0x18+	14(S)	volume state
920	 *  0x18+16	1	volume number
921	 *  0x18+21	PN	volume children count
922	 *  0x2D+16	PN	volume size
923	 *  0x3D+4	1	partition type
924	 */
925	case LDM_VBLK_T_VOLUME:
926		offset = ldm_vparm_skip(p, offset, size);
927		if (offset < 0) {
928			errstr = "volume type";
929			goto fail;
930		}
931		offset = ldm_vparm_skip(p, offset, size);
932		if (offset < 0) {
933			errstr = "unknown param";
934			goto fail;
935		}
936		if (offset + 21 >= size) {
937			errstr = "too small buffer";
938			goto fail;
939		}
940		blk->u.vol.number = p[offset + 16];
941		offset = ldm_vparm_skip(p, offset + 21, size);
942		if (offset < 0) {
943			errstr = "children count";
944			goto fail;
945		}
946		offset = ldm_vnum_get(p, offset + 16, &blk->u.vol.size, size);
947		if (offset < 0) {
948			errstr = "volume size";
949			goto fail;
950		}
951		if (offset + 4 >= size) {
952			errstr = "too small buffer";
953			goto fail;
954		}
955		blk->u.vol.part_type = p[offset + 4];
956		/* keep volumes ordered by volume number */
957		last = NULL;
958		LIST_FOREACH(volume, &db->volumes, entry) {
959			if (volume->number > blk->u.vol.number)
960				break;
961			last = volume;
962		}
963		if (last != NULL)
964			LIST_INSERT_AFTER(last, &blk->u.vol, entry);
965		else
966			LIST_INSERT_HEAD(&db->volumes, &blk->u.vol, entry);
967		break;
968	default:
969		LDM_DEBUG(1, "unknown VBLK type 0x%02x\n", blk->type);
970		LDM_DUMP(p, size);
971	}
972	LIST_INSERT_HEAD(&db->vblks, blk, entry);
973	return (0);
974fail:
975	LDM_DEBUG(0, "failed to parse '%s' in VBLK of type 0x%02x\n",
976	    errstr, blk->type);
977	LDM_DUMP(p, size);
978	g_free(blk);
979	return (EINVAL);
980}
981
982static void
983ldm_vmdb_free(struct ldm_db *db)
984{
985	struct ldm_vblk *vblk;
986	struct ldm_xvblk *xvblk;
987
988	while (!LIST_EMPTY(&db->xvblks)) {
989		xvblk = LIST_FIRST(&db->xvblks);
990		LIST_REMOVE(xvblk, entry);
991		g_free(xvblk->data);
992		g_free(xvblk);
993	}
994	while (!LIST_EMPTY(&db->vblks)) {
995		vblk = LIST_FIRST(&db->vblks);
996		LIST_REMOVE(vblk, entry);
997		g_free(vblk);
998	}
999}
1000
1001static int
1002ldm_vmdb_parse(struct ldm_db *db, struct g_consumer *cp)
1003{
1004	struct g_provider *pp;
1005	struct ldm_vblk *vblk;
1006	struct ldm_xvblk *xvblk;
1007	struct ldm_volume *volume;
1008	struct ldm_component *comp;
1009	struct ldm_vblkhdr vh;
1010	u_char *buf, *p;
1011	size_t size, n, sectors;
1012	uint64_t offset;
1013	int error;
1014
1015	pp = cp->provider;
1016	size = howmany(db->dh.last_seq * db->dh.size, pp->sectorsize);
1017	size -= 1; /* one sector takes vmdb header */
1018	for (n = 0; n < size; n += MAXPHYS / pp->sectorsize) {
1019		offset = db->ph.db_offset + db->th.conf_offset + n + 1;
1020		sectors = (size - n) > (MAXPHYS / pp->sectorsize) ?
1021		    MAXPHYS / pp->sectorsize: size - n;
1022		/* read VBLKs */
1023		buf = g_read_data(cp, offset * pp->sectorsize,
1024		    sectors * pp->sectorsize, &error);
1025		if (buf == NULL) {
1026			LDM_DEBUG(0, "%s: failed to read VBLK\n",
1027			    pp->name);
1028			goto fail;
1029		}
1030		for (p = buf; p < buf + sectors * pp->sectorsize;
1031		    p += db->dh.size) {
1032			if (memcmp(p, LDM_VBLK_SIGN,
1033			    strlen(LDM_VBLK_SIGN)) != 0) {
1034				LDM_DEBUG(0, "%s: no VBLK signature\n",
1035				    pp->name);
1036				LDM_DUMP(p, db->dh.size);
1037				goto fail;
1038			}
1039			vh.seq = be32dec(p + LDM_VBLK_SEQ_OFF);
1040			vh.group = be32dec(p + LDM_VBLK_GROUP_OFF);
1041			/* skip empty blocks */
1042			if (vh.seq == 0 || vh.group == 0)
1043				continue;
1044			vh.index = be16dec(p + LDM_VBLK_INDEX_OFF);
1045			vh.count = be16dec(p + LDM_VBLK_COUNT_OFF);
1046			if (vh.count == 0 || vh.count > 4 ||
1047			    vh.seq > db->dh.last_seq) {
1048				LDM_DEBUG(0, "%s: invalid values "
1049				    "in the VBLK header\n", pp->name);
1050				LDM_DUMP(p, db->dh.size);
1051				goto fail;
1052			}
1053			if (vh.count > 1) {
1054				error = ldm_xvblk_handle(db, &vh, p);
1055				if (error != 0) {
1056					LDM_DEBUG(0, "%s: xVBLK "
1057					    "is corrupted\n", pp->name);
1058					LDM_DUMP(p, db->dh.size);
1059					goto fail;
1060				}
1061				continue;
1062			}
1063			if (be16dec(p + 16) != 0)
1064				LDM_DEBUG(1, "%s: VBLK update"
1065				    " status is %u\n", pp->name,
1066				    be16dec(p + 16));
1067			error = ldm_vblk_handle(db, p, db->dh.size);
1068			if (error != 0)
1069				goto fail;
1070		}
1071		g_free(buf);
1072		buf = NULL;
1073	}
1074	/* Parse xVBLKs */
1075	while (!LIST_EMPTY(&db->xvblks)) {
1076		xvblk = LIST_FIRST(&db->xvblks);
1077		if (xvblk->map == 0xFF) {
1078			error = ldm_vblk_handle(db, xvblk->data, xvblk->size);
1079			if (error != 0)
1080				goto fail;
1081		} else {
1082			LDM_DEBUG(0, "%s: incomplete or corrupt "
1083			    "xVBLK found\n", pp->name);
1084			goto fail;
1085		}
1086		LIST_REMOVE(xvblk, entry);
1087		g_free(xvblk->data);
1088		g_free(xvblk);
1089	}
1090	/* construct all VBLKs relations */
1091	LIST_FOREACH(volume, &db->volumes, entry) {
1092		LIST_FOREACH(vblk, &db->vblks, entry)
1093			if (vblk->type == LDM_VBLK_T_COMPONENT &&
1094			    vblk->u.comp.vol_id == volume->id) {
1095				LIST_INSERT_HEAD(&volume->components,
1096				    &vblk->u.comp, entry);
1097				volume->count++;
1098			}
1099		LIST_FOREACH(comp, &volume->components, entry)
1100			LIST_FOREACH(vblk, &db->vblks, entry)
1101				if (vblk->type == LDM_VBLK_T_PARTITION &&
1102				    vblk->u.part.comp_id == comp->id) {
1103					LIST_INSERT_HEAD(&comp->partitions,
1104					    &vblk->u.part, entry);
1105					comp->count++;
1106				}
1107	}
1108	return (0);
1109fail:
1110	ldm_vmdb_free(db);
1111	g_free(buf);
1112	return (ENXIO);
1113}
1114
1115static int
1116g_part_ldm_add(struct g_part_table *basetable, struct g_part_entry *baseentry,
1117    struct g_part_parms *gpp)
1118{
1119
1120	return (ENOSYS);
1121}
1122
1123static int
1124g_part_ldm_bootcode(struct g_part_table *basetable, struct g_part_parms *gpp)
1125{
1126
1127	return (ENOSYS);
1128}
1129
1130static int
1131g_part_ldm_create(struct g_part_table *basetable, struct g_part_parms *gpp)
1132{
1133
1134	return (ENOSYS);
1135}
1136
1137static int
1138g_part_ldm_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
1139{
1140	struct g_part_ldm_table *table;
1141	struct g_provider *pp;
1142
1143	table = (struct g_part_ldm_table *)basetable;
1144	/*
1145	 * To destroy LDM on a disk partitioned with GPT we should delete
1146	 * ms-ldm-metadata partition, but we can't do this via standard
1147	 * GEOM_PART method.
1148	 */
1149	if (table->is_gpt)
1150		return (ENOSYS);
1151	pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
1152	/*
1153	 * To destroy LDM we should wipe MBR, first private header and
1154	 * backup private headers.
1155	 */
1156	basetable->gpt_smhead = (1 << ldm_ph_off[0]) | 1;
1157	/*
1158	 * Don't touch last backup private header when LDM database is
1159	 * not located in the last 1MByte area.
1160	 * XXX: can't remove all blocks.
1161	 */
1162	if (table->db_offset + LDM_DB_SIZE ==
1163	    pp->mediasize / pp->sectorsize)
1164		basetable->gpt_smtail = 1;
1165	return (0);
1166}
1167
1168static void
1169g_part_ldm_dumpconf(struct g_part_table *basetable,
1170    struct g_part_entry *baseentry, struct sbuf *sb, const char *indent)
1171{
1172	struct g_part_ldm_entry *entry;
1173
1174	entry = (struct g_part_ldm_entry *)baseentry;
1175	if (indent == NULL) {
1176		/* conftxt: libdisk compatibility */
1177		sbuf_printf(sb, " xs LDM xt %u", entry->type);
1178	} else if (entry != NULL) {
1179		/* confxml: partition entry information */
1180		sbuf_printf(sb, "%s<rawtype>%u</rawtype>\n", indent,
1181		    entry->type);
1182	} else {
1183		/* confxml: scheme information */
1184	}
1185}
1186
1187static int
1188g_part_ldm_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)
1189{
1190
1191	return (0);
1192}
1193
1194static int
1195g_part_ldm_modify(struct g_part_table *basetable,
1196    struct g_part_entry *baseentry, struct g_part_parms *gpp)
1197{
1198
1199	return (ENOSYS);
1200}
1201
1202static const char *
1203g_part_ldm_name(struct g_part_table *table, struct g_part_entry *baseentry,
1204    char *buf, size_t bufsz)
1205{
1206
1207	snprintf(buf, bufsz, "s%d", baseentry->gpe_index);
1208	return (buf);
1209}
1210
1211static int
1212ldm_gpt_probe(struct g_part_table *basetable, struct g_consumer *cp)
1213{
1214	struct g_part_ldm_table *table;
1215	struct g_part_table *gpt;
1216	struct g_part_entry *entry;
1217	struct g_consumer *cp2;
1218	struct gpt_ent *part;
1219	u_char *buf;
1220	int error;
1221
1222	/*
1223	 * XXX: We use some knowledge about GEOM_PART_GPT internal
1224	 * structures, but it is easier than parse GPT by himself.
1225	 */
1226	g_topology_lock();
1227	gpt = cp->provider->geom->softc;
1228	LIST_FOREACH(entry, &gpt->gpt_entry, gpe_entry) {
1229		part = (struct gpt_ent *)(entry + 1);
1230		/* Search ms-ldm-metadata partition */
1231		if (memcmp(&part->ent_type,
1232		    &gpt_uuid_ms_ldm_metadata, sizeof(struct uuid)) != 0 ||
1233		    entry->gpe_end - entry->gpe_start < LDM_DB_SIZE - 1)
1234			continue;
1235
1236		/* Create new consumer and attach it to metadata partition */
1237		cp2 = g_new_consumer(cp->geom);
1238		error = g_attach(cp2, entry->gpe_pp);
1239		if (error != 0) {
1240			g_destroy_consumer(cp2);
1241			g_topology_unlock();
1242			return (ENXIO);
1243		}
1244		error = g_access(cp2, 1, 0, 0);
1245		if (error != 0) {
1246			g_detach(cp2);
1247			g_destroy_consumer(cp2);
1248			g_topology_unlock();
1249			return (ENXIO);
1250		}
1251		g_topology_unlock();
1252
1253		LDM_DEBUG(2, "%s: LDM metadata partition %s found in the GPT",
1254		    cp->provider->name, cp2->provider->name);
1255		/* Read the LDM private header */
1256		buf = ldm_privhdr_read(cp2,
1257		    ldm_ph_off[LDM_PH_GPTINDEX] * cp2->provider->sectorsize,
1258		    &error);
1259		if (buf != NULL) {
1260			table = (struct g_part_ldm_table *)basetable;
1261			table->is_gpt = 1;
1262			g_free(buf);
1263			return (G_PART_PROBE_PRI_HIGH);
1264		}
1265
1266		/* second consumer is no longer needed. */
1267		g_topology_lock();
1268		g_access(cp2, -1, 0, 0);
1269		g_detach(cp2);
1270		g_destroy_consumer(cp2);
1271		break;
1272	}
1273	g_topology_unlock();
1274	return (ENXIO);
1275}
1276
1277static int
1278g_part_ldm_probe(struct g_part_table *basetable, struct g_consumer *cp)
1279{
1280	struct g_provider *pp;
1281	u_char *buf, type[64];
1282	int error, idx;
1283
1284
1285	pp = cp->provider;
1286	if (pp->sectorsize != 512)
1287		return (ENXIO);
1288
1289	error = g_getattr("PART::scheme", cp, &type);
1290	if (error == 0 && strcmp(type, "GPT") == 0) {
1291		if (g_getattr("PART::type", cp, &type) != 0 ||
1292		    strcmp(type, "ms-ldm-data") != 0)
1293			return (ENXIO);
1294		error = ldm_gpt_probe(basetable, cp);
1295		return (error);
1296	}
1297
1298	if (basetable->gpt_depth != 0)
1299		return (ENXIO);
1300
1301	/* LDM has 1M metadata area */
1302	if (pp->mediasize <= 1024 * 1024)
1303		return (ENOSPC);
1304
1305	/* Check that there's a MBR */
1306	buf = g_read_data(cp, 0, pp->sectorsize, &error);
1307	if (buf == NULL)
1308		return (error);
1309
1310	if (le16dec(buf + DOSMAGICOFFSET) != DOSMAGIC) {
1311		g_free(buf);
1312		return (ENXIO);
1313	}
1314	error = ENXIO;
1315	/* Check that we have LDM partitions in the MBR */
1316	for (idx = 0; idx < NDOSPART && error != 0; idx++) {
1317		if (buf[DOSPARTOFF + idx * DOSPARTSIZE + 4] == DOSPTYP_LDM)
1318			error = 0;
1319	}
1320	g_free(buf);
1321	if (error == 0) {
1322		LDM_DEBUG(2, "%s: LDM data partitions found in MBR",
1323		    pp->name);
1324		/* Read the LDM private header */
1325		buf = ldm_privhdr_read(cp,
1326		    ldm_ph_off[LDM_PH_MBRINDEX] * pp->sectorsize, &error);
1327		if (buf == NULL)
1328			return (error);
1329		g_free(buf);
1330		return (G_PART_PROBE_PRI_HIGH);
1331	}
1332	return (error);
1333}
1334
1335static int
1336g_part_ldm_read(struct g_part_table *basetable, struct g_consumer *cp)
1337{
1338	struct g_part_ldm_table *table;
1339	struct g_part_ldm_entry *entry;
1340	struct g_consumer *cp2;
1341	struct ldm_component *comp;
1342	struct ldm_partition *part;
1343	struct ldm_volume *vol;
1344	struct ldm_disk *disk;
1345	struct ldm_db db;
1346	int error, index, skipped;
1347
1348	table = (struct g_part_ldm_table *)basetable;
1349	memset(&db, 0, sizeof(db));
1350	cp2 = cp;					/* ms-ldm-data */
1351	if (table->is_gpt)
1352		cp = LIST_FIRST(&cp->geom->consumer);	/* ms-ldm-metadata */
1353	/* Read and parse LDM private headers. */
1354	error = ldm_privhdr_check(&db, cp, table->is_gpt);
1355	if (error != 0)
1356		goto gpt_cleanup;
1357	basetable->gpt_first = table->is_gpt ? 0: db.ph.start;
1358	basetable->gpt_last = basetable->gpt_first + db.ph.size - 1;
1359	table->db_offset = db.ph.db_offset;
1360	/* Make additional checks for GPT */
1361	if (table->is_gpt) {
1362		error = ldm_gpt_check(&db, cp);
1363		if (error != 0)
1364			goto gpt_cleanup;
1365		/*
1366		 * Now we should reset database offset to zero, because our
1367		 * consumer cp is attached to the ms-ldm-metadata partition
1368		 * and we don't need add db_offset to read from it.
1369		 */
1370		db.ph.db_offset = 0;
1371	}
1372	/* Read and parse LDM TOC headers. */
1373	error = ldm_tochdr_check(&db, cp);
1374	if (error != 0)
1375		goto gpt_cleanup;
1376	/* Read and parse LDM VMDB header. */
1377	error = ldm_vmdbhdr_check(&db, cp);
1378	if (error != 0)
1379		goto gpt_cleanup;
1380	error = ldm_vmdb_parse(&db, cp);
1381	/*
1382	 * For the GPT case we must detach and destroy
1383	 * second consumer before return.
1384	 */
1385gpt_cleanup:
1386	if (table->is_gpt) {
1387		g_topology_lock();
1388		g_access(cp, -1, 0, 0);
1389		g_detach(cp);
1390		g_destroy_consumer(cp);
1391		g_topology_unlock();
1392		cp = cp2;
1393	}
1394	if (error != 0)
1395		return (error);
1396	/* Search current disk in the disk list. */
1397	LIST_FOREACH(disk, &db.disks, entry)
1398	    if (memcmp(&disk->guid, &db.ph.disk_guid,
1399		sizeof(struct uuid)) == 0)
1400		    break;
1401	if (disk == NULL) {
1402		LDM_DEBUG(1, "%s: no LDM volumes on this disk",
1403		    cp->provider->name);
1404		ldm_vmdb_free(&db);
1405		return (ENXIO);
1406	}
1407	index = 1;
1408	LIST_FOREACH(vol, &db.volumes, entry) {
1409		LIST_FOREACH(comp, &vol->components, entry) {
1410			/* Skip volumes from different disks. */
1411			part = LIST_FIRST(&comp->partitions);
1412			if (part->disk_id != disk->id)
1413				continue;
1414			skipped = 0;
1415			/* We don't support spanned and striped volumes. */
1416			if (comp->count > 1 || part->offset != 0) {
1417				LDM_DEBUG(1, "%s: LDM volume component "
1418				    "%ju has %u partitions. Skipped",
1419				    cp->provider->name, (uintmax_t)comp->id,
1420				    comp->count);
1421				skipped = 1;
1422			}
1423			/*
1424			 * Allow mirrored volumes only when they are explicitly
1425			 * allowed with kern.geom.part.ldm.show_mirrors=1.
1426			 */
1427			if (vol->count > 1 && show_mirrors == 0) {
1428				LDM_DEBUG(1, "%s: LDM volume %ju has %u "
1429				    "components. Skipped",
1430				    cp->provider->name, (uintmax_t)vol->id,
1431				    vol->count);
1432				skipped = 1;
1433			}
1434			entry = (struct g_part_ldm_entry *)g_part_new_entry(
1435			    basetable, index++,
1436			    basetable->gpt_first + part->start,
1437			    basetable->gpt_first + part->start +
1438			    part->size - 1);
1439			/*
1440			 * Mark skipped partition as ms-ldm-data partition.
1441			 * We do not support them, but it is better to show
1442			 * that we have something there, than just show
1443			 * free space.
1444			 */
1445			if (skipped == 0)
1446				entry->type = vol->part_type;
1447			else
1448				entry->type = DOSPTYP_LDM;
1449			LDM_DEBUG(1, "%s: new volume id: %ju, start: %ju,"
1450			    " end: %ju, type: 0x%02x\n", cp->provider->name,
1451			    (uintmax_t)part->id,(uintmax_t)part->start +
1452			    basetable->gpt_first, (uintmax_t)part->start +
1453			    part->size + basetable->gpt_first - 1,
1454			    vol->part_type);
1455		}
1456	}
1457	ldm_vmdb_free(&db);
1458	return (error);
1459}
1460
1461static const char *
1462g_part_ldm_type(struct g_part_table *basetable, struct g_part_entry *baseentry,
1463    char *buf, size_t bufsz)
1464{
1465	struct g_part_ldm_entry *entry;
1466	int i;
1467
1468	entry = (struct g_part_ldm_entry *)baseentry;
1469	for (i = 0; i < nitems(ldm_alias_match); i++) {
1470		if (ldm_alias_match[i].typ == entry->type)
1471			return (g_part_alias_name(ldm_alias_match[i].alias));
1472	}
1473	snprintf(buf, bufsz, "!%d", entry->type);
1474	return (buf);
1475}
1476
1477static int
1478g_part_ldm_write(struct g_part_table *basetable, struct g_consumer *cp)
1479{
1480
1481	return (ENOSYS);
1482}
1483