1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2008 Andrew Thompson <thompsa@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <sys/ctype.h>
33#include <sys/param.h>
34#include <sys/bio.h>
35#include <sys/kernel.h>
36#include <sys/limits.h>
37#include <sys/malloc.h>
38#include <sys/queue.h>
39#include <sys/sysctl.h>
40#include <sys/systm.h>
41
42#include <geom/geom.h>
43#include <geom/geom_dbg.h>
44#include <sys/endian.h>
45
46#include <geom/linux_lvm/g_linux_lvm.h>
47
48FEATURE(geom_linux_lvm, "GEOM Linux LVM partitioning support");
49
50/* Declare malloc(9) label */
51static MALLOC_DEFINE(M_GLLVM, "gllvm", "GEOM_LINUX_LVM Data");
52
53/* GEOM class methods */
54static g_access_t g_llvm_access;
55static g_init_t g_llvm_init;
56static g_orphan_t g_llvm_orphan;
57static g_orphan_t g_llvm_taste_orphan;
58static g_start_t g_llvm_start;
59static g_taste_t g_llvm_taste;
60static g_ctl_destroy_geom_t g_llvm_destroy_geom;
61
62static void	g_llvm_done(struct bio *);
63static void	g_llvm_remove_disk(struct g_llvm_vg *, struct g_consumer *);
64static int	g_llvm_activate_lv(struct g_llvm_vg *, struct g_llvm_lv *);
65static int	g_llvm_add_disk(struct g_llvm_vg *, struct g_provider *, char *);
66static void	g_llvm_free_vg(struct g_llvm_vg *);
67static int	g_llvm_destroy(struct g_llvm_vg *, int);
68static int	g_llvm_read_label(struct g_consumer *, struct g_llvm_label *);
69static int	g_llvm_read_md(struct g_consumer *, struct g_llvm_metadata *,
70		    struct g_llvm_label *);
71
72static int	llvm_label_decode(const u_char *, struct g_llvm_label *, int);
73static int	llvm_md_decode(const u_char *, struct g_llvm_metadata *,
74		    struct g_llvm_label *);
75static int	llvm_textconf_decode(u_char *, int,
76		    struct g_llvm_metadata *);
77static int	llvm_textconf_decode_pv(char **, char *, struct g_llvm_vg *);
78static int	llvm_textconf_decode_lv(char **, char *, struct g_llvm_vg *);
79static int	llvm_textconf_decode_sg(char **, char *, struct g_llvm_lv *);
80
81SYSCTL_DECL(_kern_geom);
82SYSCTL_NODE(_kern_geom, OID_AUTO, linux_lvm, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
83    "GEOM_LINUX_LVM stuff");
84static u_int g_llvm_debug = 0;
85SYSCTL_UINT(_kern_geom_linux_lvm, OID_AUTO, debug, CTLFLAG_RWTUN, &g_llvm_debug, 0,
86    "Debug level");
87
88LIST_HEAD(, g_llvm_vg) vg_list;
89
90/*
91 * Called to notify geom when it's been opened, and for what intent
92 */
93static int
94g_llvm_access(struct g_provider *pp, int dr, int dw, int de)
95{
96	struct g_consumer *c;
97	struct g_llvm_vg *vg;
98	struct g_geom *gp;
99	int error;
100
101	KASSERT(pp != NULL, ("%s: NULL provider", __func__));
102	gp = pp->geom;
103	KASSERT(gp != NULL, ("%s: NULL geom", __func__));
104	vg = gp->softc;
105
106	if (vg == NULL) {
107		/* It seems that .access can be called with negative dr,dw,dx
108		 * in this case but I want to check for myself */
109		G_LLVM_DEBUG(0, "access(%d, %d, %d) for %s",
110		    dr, dw, de, pp->name);
111
112		/* This should only happen when geom is withered so
113		 * allow only negative requests */
114		KASSERT(dr <= 0 && dw <= 0 && de <= 0,
115		    ("%s: Positive access for %s", __func__, pp->name));
116		if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0)
117			G_LLVM_DEBUG(0,
118			    "Device %s definitely destroyed", pp->name);
119		return (0);
120	}
121
122	/* Grab an exclusive bit to propagate on our consumers on first open */
123	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
124		de++;
125	/* ... drop it on close */
126	if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0)
127		de--;
128
129	error = ENXIO;
130	LIST_FOREACH(c, &gp->consumer, consumer) {
131		KASSERT(c != NULL, ("%s: consumer is NULL", __func__));
132		error = g_access(c, dr, dw, de);
133		if (error != 0) {
134			struct g_consumer *c2;
135
136			/* Backout earlier changes */
137			LIST_FOREACH(c2, &gp->consumer, consumer) {
138				if (c2 == c) /* all eariler components fixed */
139					return (error);
140				g_access(c2, -dr, -dw, -de);
141			}
142		}
143	}
144
145	return (error);
146}
147
148/*
149 * Dismantle bio_queue and destroy its components
150 */
151static void
152bioq_dismantle(struct bio_queue_head *bq)
153{
154	struct bio *b;
155
156	for (b = bioq_first(bq); b != NULL; b = bioq_first(bq)) {
157		bioq_remove(bq, b);
158		g_destroy_bio(b);
159	}
160}
161
162/*
163 * GEOM .done handler
164 * Can't use standard handler because one requested IO may
165 * fork into additional data IOs
166 */
167static void
168g_llvm_done(struct bio *b)
169{
170	struct bio *parent_b;
171
172	parent_b = b->bio_parent;
173
174	if (b->bio_error != 0) {
175		G_LLVM_DEBUG(0, "Error %d for offset=%ju, length=%ju on %s",
176		    b->bio_error, b->bio_offset, b->bio_length,
177		    b->bio_to->name);
178		if (parent_b->bio_error == 0)
179			parent_b->bio_error = b->bio_error;
180	}
181
182	parent_b->bio_inbed++;
183	parent_b->bio_completed += b->bio_completed;
184
185	if (parent_b->bio_children == parent_b->bio_inbed) {
186		parent_b->bio_completed = parent_b->bio_length;
187		g_io_deliver(parent_b, parent_b->bio_error);
188	}
189	g_destroy_bio(b);
190}
191
192static void
193g_llvm_start(struct bio *bp)
194{
195	struct g_provider *pp;
196	struct g_llvm_vg *vg;
197	struct g_llvm_pv *pv;
198	struct g_llvm_lv *lv;
199	struct g_llvm_segment *sg;
200	struct bio *cb;
201	struct bio_queue_head bq;
202	size_t chunk_size;
203	off_t offset, length;
204	char *addr;
205	u_int count;
206
207	pp = bp->bio_to;
208	lv = pp->private;
209	vg = pp->geom->softc;
210
211	switch (bp->bio_cmd) {
212	case BIO_READ:
213	case BIO_WRITE:
214	case BIO_DELETE:
215	/* XXX BIO_GETATTR allowed? */
216		break;
217	default:
218		/*
219		 * BIO_SPEEDUP and BIO_FLUSH should pass through to all sg
220		 * elements, but aren't.
221		 */
222		g_io_deliver(bp, EOPNOTSUPP);
223		return;
224	}
225
226	bioq_init(&bq);
227
228	chunk_size = vg->vg_extentsize;
229	addr = bp->bio_data;
230	offset = bp->bio_offset;	/* virtual offset and length */
231	length = bp->bio_length;
232
233	while (length > 0) {
234		size_t chunk_index, in_chunk_offset, in_chunk_length;
235
236		pv = NULL;
237		cb = g_clone_bio(bp);
238		if (cb == NULL) {
239			bioq_dismantle(&bq);
240			if (bp->bio_error == 0)
241				bp->bio_error = ENOMEM;
242			g_io_deliver(bp, bp->bio_error);
243			return;
244		}
245
246		/* get the segment and the pv */
247		if (lv->lv_sgcount == 1) {
248			/* skip much of the calculations for a single sg */
249			chunk_index = 0;
250			in_chunk_offset = 0;
251			in_chunk_length = length;
252			sg = lv->lv_firstsg;
253			pv = sg->sg_pv;
254			cb->bio_offset = offset + sg->sg_pvoffset;
255		} else {
256			chunk_index = offset / chunk_size; /* round downwards */
257			in_chunk_offset = offset % chunk_size;
258			in_chunk_length =
259			    min(length, chunk_size - in_chunk_offset);
260
261			/* XXX could be faster */
262			LIST_FOREACH(sg, &lv->lv_segs, sg_next) {
263				if (chunk_index >= sg->sg_start &&
264				    chunk_index <= sg->sg_end) {
265					/* adjust chunk index for sg start */
266					chunk_index -= sg->sg_start;
267					pv = sg->sg_pv;
268					break;
269				}
270			}
271			cb->bio_offset =
272			    (off_t)chunk_index * (off_t)chunk_size
273			    + in_chunk_offset + sg->sg_pvoffset;
274		}
275
276		KASSERT(pv != NULL, ("Can't find PV for chunk %zu",
277		    chunk_index));
278
279		cb->bio_to = pv->pv_gprov;
280		cb->bio_done = g_llvm_done;
281		cb->bio_length = in_chunk_length;
282		cb->bio_data = addr;
283		cb->bio_caller1 = pv;
284		bioq_disksort(&bq, cb);
285
286		G_LLVM_DEBUG(5,
287		    "Mapped %s(%ju, %ju) on %s to %zu(%zu,%zu) @ %s:%ju",
288		    bp->bio_cmd == BIO_READ ? "R" : "W",
289		    offset, length, lv->lv_name,
290		    chunk_index, in_chunk_offset, in_chunk_length,
291		    pv->pv_name, cb->bio_offset);
292
293		addr += in_chunk_length;
294		length -= in_chunk_length;
295		offset += in_chunk_length;
296	}
297
298	/* Fire off bio's here */
299	count = 0;
300	for (cb = bioq_first(&bq); cb != NULL; cb = bioq_first(&bq)) {
301		bioq_remove(&bq, cb);
302		pv = cb->bio_caller1;
303		cb->bio_caller1 = NULL;
304		G_LLVM_DEBUG(6, "firing bio to %s, offset=%ju, length=%ju",
305		    cb->bio_to->name, cb->bio_offset, cb->bio_length);
306		g_io_request(cb, pv->pv_gcons);
307		count++;
308	}
309	if (count == 0) { /* We handled everything locally */
310		bp->bio_completed = bp->bio_length;
311		g_io_deliver(bp, 0);
312	}
313}
314
315static void
316g_llvm_remove_disk(struct g_llvm_vg *vg, struct g_consumer *cp)
317{
318	struct g_llvm_pv *pv;
319	struct g_llvm_lv *lv;
320	struct g_llvm_segment *sg;
321	int found;
322
323	KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__));
324	pv = (struct g_llvm_pv *)cp->private;
325
326	G_LLVM_DEBUG(0, "Disk %s removed from %s.", cp->provider->name,
327	    pv->pv_name);
328
329	LIST_FOREACH(lv, &vg->vg_lvs, lv_next) {
330		/* Find segments that map to this disk */
331		found = 0;
332		LIST_FOREACH(sg, &lv->lv_segs, sg_next) {
333			if (sg->sg_pv == pv) {
334				sg->sg_pv = NULL;
335				lv->lv_sgactive--;
336				found = 1;
337				break;
338			}
339		}
340		if (found) {
341			G_LLVM_DEBUG(0, "Device %s removed.",
342			    lv->lv_gprov->name);
343			g_wither_provider(lv->lv_gprov, ENXIO);
344			lv->lv_gprov = NULL;
345		}
346	}
347
348	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
349		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
350	g_detach(cp);
351	g_destroy_consumer(cp);
352}
353
354static void
355g_llvm_orphan(struct g_consumer *cp)
356{
357	struct g_llvm_vg *vg;
358	struct g_geom *gp;
359
360	g_topology_assert();
361	gp = cp->geom;
362	vg = gp->softc;
363	if (vg == NULL)
364		return;
365
366	g_llvm_remove_disk(vg, cp);
367	g_llvm_destroy(vg, 1);
368}
369
370static int
371g_llvm_activate_lv(struct g_llvm_vg *vg, struct g_llvm_lv *lv)
372{
373	struct g_geom *gp;
374	struct g_provider *pp;
375
376	g_topology_assert();
377
378	KASSERT(lv->lv_sgactive == lv->lv_sgcount, ("segment missing"));
379
380	gp = vg->vg_geom;
381	pp = g_new_providerf(gp, "linux_lvm/%s-%s", vg->vg_name, lv->lv_name);
382	pp->mediasize = vg->vg_extentsize * (off_t)lv->lv_extentcount;
383	pp->sectorsize = vg->vg_sectorsize;
384	g_error_provider(pp, 0);
385	lv->lv_gprov = pp;
386	pp->private = lv;
387
388	G_LLVM_DEBUG(1, "Created %s, %juM", pp->name,
389	    pp->mediasize / (1024*1024));
390
391	return (0);
392}
393
394static int
395g_llvm_add_disk(struct g_llvm_vg *vg, struct g_provider *pp, char *uuid)
396{
397	struct g_geom *gp;
398	struct g_consumer *cp, *fcp;
399	struct g_llvm_pv *pv;
400	struct g_llvm_lv *lv;
401	struct g_llvm_segment *sg;
402	int error;
403
404	g_topology_assert();
405
406	LIST_FOREACH(pv, &vg->vg_pvs, pv_next) {
407		if (strcmp(pv->pv_uuid, uuid) == 0)
408			break;	/* found it */
409	}
410	if (pv == NULL) {
411		G_LLVM_DEBUG(3, "uuid %s not found in pv list", uuid);
412		return (ENOENT);
413	}
414	if (pv->pv_gprov != NULL) {
415		G_LLVM_DEBUG(0, "disk %s already initialised in %s",
416		    pv->pv_name, vg->vg_name);
417		return (EEXIST);
418	}
419
420	pv->pv_start *= vg->vg_sectorsize;
421	gp = vg->vg_geom;
422	fcp = LIST_FIRST(&gp->consumer);
423
424	cp = g_new_consumer(gp);
425	error = g_attach(cp, pp);
426	G_LLVM_DEBUG(1, "Attached %s to %s at offset %ju",
427	    pp->name, pv->pv_name, pv->pv_start);
428
429	if (error != 0) {
430		G_LLVM_DEBUG(0, "cannot attach %s to %s",
431		    pp->name, vg->vg_name);
432		g_destroy_consumer(cp);
433		return (error);
434	}
435
436	if (fcp != NULL) {
437		if (fcp->provider->sectorsize != pp->sectorsize) {
438			G_LLVM_DEBUG(0, "Provider %s of %s has invalid "
439			    "sector size (%d)", pp->name, vg->vg_name,
440			    pp->sectorsize);
441			return (EINVAL);
442		}
443		if (fcp->acr > 0 || fcp->acw || fcp->ace > 0) {
444			/* Replicate access permissions from first "live"
445			 * consumer to the new one */
446			error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
447			if (error != 0) {
448				g_detach(cp);
449				g_destroy_consumer(cp);
450				return (error);
451			}
452		}
453	}
454
455	cp->private = pv;
456	pv->pv_gcons = cp;
457	pv->pv_gprov = pp;
458
459	LIST_FOREACH(lv, &vg->vg_lvs, lv_next) {
460		/* Find segments that map to this disk */
461		LIST_FOREACH(sg, &lv->lv_segs, sg_next) {
462			if (strcmp(sg->sg_pvname, pv->pv_name) == 0) {
463				/* avtivate the segment */
464				KASSERT(sg->sg_pv == NULL,
465				    ("segment already mapped"));
466				sg->sg_pvoffset =
467				    (off_t)sg->sg_pvstart * vg->vg_extentsize
468				    + pv->pv_start;
469				sg->sg_pv = pv;
470				lv->lv_sgactive++;
471
472				G_LLVM_DEBUG(2, "%s: %d to %d @ %s:%d"
473				    " offset %ju sector %ju",
474				    lv->lv_name, sg->sg_start, sg->sg_end,
475				    sg->sg_pvname, sg->sg_pvstart,
476				    sg->sg_pvoffset,
477				    sg->sg_pvoffset / vg->vg_sectorsize);
478			}
479		}
480		/* Activate any lvs waiting on this disk */
481		if (lv->lv_gprov == NULL && lv->lv_sgactive == lv->lv_sgcount) {
482			error = g_llvm_activate_lv(vg, lv);
483			if (error)
484				break;
485		}
486	}
487	return (error);
488}
489
490static void
491g_llvm_init(struct g_class *mp)
492{
493	LIST_INIT(&vg_list);
494}
495
496static void
497g_llvm_free_vg(struct g_llvm_vg *vg)
498{
499	struct g_llvm_pv *pv;
500	struct g_llvm_lv *lv;
501	struct g_llvm_segment *sg;
502
503	/* Free all the structures */
504	while ((pv = LIST_FIRST(&vg->vg_pvs)) != NULL) {
505		LIST_REMOVE(pv, pv_next);
506		free(pv, M_GLLVM);
507	}
508	while ((lv = LIST_FIRST(&vg->vg_lvs)) != NULL) {
509		while ((sg = LIST_FIRST(&lv->lv_segs)) != NULL) {
510			LIST_REMOVE(sg, sg_next);
511			free(sg, M_GLLVM);
512		}
513		LIST_REMOVE(lv, lv_next);
514		free(lv, M_GLLVM);
515	}
516	LIST_REMOVE(vg, vg_next);
517	free(vg, M_GLLVM);
518}
519
520static void
521g_llvm_taste_orphan(struct g_consumer *cp)
522{
523
524	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
525	    cp->provider->name));
526}
527
528static struct g_geom *
529g_llvm_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
530{
531	struct g_consumer *cp;
532	struct g_geom *gp;
533	struct g_llvm_label ll;
534	struct g_llvm_metadata md;
535	struct g_llvm_vg *vg;
536	int error;
537
538	bzero(&md, sizeof(md));
539
540	g_topology_assert();
541	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
542	gp = g_new_geomf(mp, "linux_lvm:taste");
543	/* This orphan function should be never called. */
544	gp->orphan = g_llvm_taste_orphan;
545	cp = g_new_consumer(gp);
546	error = g_attach(cp, pp);
547	if (error == 0) {
548		error = g_llvm_read_label(cp, &ll);
549		if (error == 0)
550			error = g_llvm_read_md(cp, &md, &ll);
551		g_detach(cp);
552	}
553	g_destroy_consumer(cp);
554	g_destroy_geom(gp);
555	if (error != 0)
556		return (NULL);
557
558	vg = md.md_vg;
559	if (vg->vg_geom == NULL) {
560		/* new volume group */
561		gp = g_new_geomf(mp, "%s", vg->vg_name);
562		gp->start = g_llvm_start;
563		gp->spoiled = g_llvm_orphan;
564		gp->orphan = g_llvm_orphan;
565		gp->access = g_llvm_access;
566		vg->vg_sectorsize = pp->sectorsize;
567		vg->vg_extentsize *= vg->vg_sectorsize;
568		vg->vg_geom = gp;
569		gp->softc = vg;
570		G_LLVM_DEBUG(1, "Created volume %s, extent size %zuK",
571		    vg->vg_name, vg->vg_extentsize / 1024);
572	}
573
574	/* initialise this disk in the volume group */
575	g_llvm_add_disk(vg, pp, ll.ll_uuid);
576	return (vg->vg_geom);
577}
578
579static int
580g_llvm_destroy(struct g_llvm_vg *vg, int force)
581{
582	struct g_provider *pp;
583	struct g_geom *gp;
584
585	g_topology_assert();
586	if (vg == NULL)
587		return (ENXIO);
588	gp = vg->vg_geom;
589
590	LIST_FOREACH(pp, &gp->provider, provider) {
591		if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) {
592			G_LLVM_DEBUG(1, "Device %s is still open (r%dw%de%d)",
593			    pp->name, pp->acr, pp->acw, pp->ace);
594			if (!force)
595				return (EBUSY);
596		}
597	}
598
599	g_llvm_free_vg(gp->softc);
600	gp->softc = NULL;
601	g_wither_geom(gp, ENXIO);
602	return (0);
603}
604
605static int
606g_llvm_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused,
607    struct g_geom *gp)
608{
609	struct g_llvm_vg *vg;
610
611	vg = gp->softc;
612	return (g_llvm_destroy(vg, 0));
613}
614
615int
616g_llvm_read_label(struct g_consumer *cp, struct g_llvm_label *ll)
617{
618	struct g_provider *pp;
619	u_char *buf;
620	int i, error = 0;
621
622	g_topology_assert();
623
624	/* The LVM label is stored on the first four sectors */
625	error = g_access(cp, 1, 0, 0);
626	if (error != 0)
627		return (error);
628	pp = cp->provider;
629	g_topology_unlock();
630	buf = g_read_data(cp, 0, pp->sectorsize * 4, &error);
631	g_topology_lock();
632	g_access(cp, -1, 0, 0);
633	if (buf == NULL) {
634		G_LLVM_DEBUG(1, "Cannot read metadata from %s (error=%d)",
635		    pp->name, error);
636		return (error);
637	}
638
639	/* Search the four sectors for the LVM label. */
640	for (i = 0; i < 4; i++) {
641		error = llvm_label_decode(&buf[i * pp->sectorsize], ll, i);
642		if (error == 0)
643			break;	/* found it */
644	}
645	g_free(buf);
646	return (error);
647}
648
649int
650g_llvm_read_md(struct g_consumer *cp, struct g_llvm_metadata *md,
651    struct g_llvm_label *ll)
652{
653	struct g_provider *pp;
654	u_char *buf;
655	int error;
656	int size;
657
658	g_topology_assert();
659
660	error = g_access(cp, 1, 0, 0);
661	if (error != 0)
662		return (error);
663	pp = cp->provider;
664	g_topology_unlock();
665	buf = g_read_data(cp, ll->ll_md_offset, pp->sectorsize, &error);
666	g_topology_lock();
667	g_access(cp, -1, 0, 0);
668	if (buf == NULL) {
669		G_LLVM_DEBUG(0, "Cannot read metadata from %s (error=%d)",
670		    cp->provider->name, error);
671		return (error);
672	}
673
674	error = llvm_md_decode(buf, md, ll);
675	g_free(buf);
676	if (error != 0) {
677		return (error);
678	}
679
680	G_LLVM_DEBUG(1, "reading LVM2 config @ %s:%ju", pp->name,
681		    ll->ll_md_offset + md->md_reloffset);
682	error = g_access(cp, 1, 0, 0);
683	if (error != 0)
684		return (error);
685	pp = cp->provider;
686	g_topology_unlock();
687	/* round up to the nearest sector */
688	size = md->md_relsize +
689	    (pp->sectorsize - md->md_relsize % pp->sectorsize);
690	buf = g_read_data(cp, ll->ll_md_offset + md->md_reloffset, size, &error);
691	g_topology_lock();
692	g_access(cp, -1, 0, 0);
693	if (buf == NULL) {
694		G_LLVM_DEBUG(0, "Cannot read LVM2 config from %s (error=%d)",
695		    pp->name, error);
696		return (error);
697	}
698	buf[md->md_relsize] = '\0';
699	G_LLVM_DEBUG(10, "LVM config:\n%s\n", buf);
700	error = llvm_textconf_decode(buf, md->md_relsize, md);
701	g_free(buf);
702
703	return (error);
704}
705
706static int
707llvm_label_decode(const u_char *data, struct g_llvm_label *ll, int sector)
708{
709	uint64_t off;
710	char *uuid;
711
712	/* Magic string */
713	if (bcmp("LABELONE", data , 8) != 0)
714		return (EINVAL);
715
716	/* We only support LVM2 text format */
717	if (bcmp("LVM2 001", data + 24, 8) != 0) {
718		G_LLVM_DEBUG(0, "Unsupported LVM format");
719		return (EINVAL);
720	}
721
722	ll->ll_sector = le64dec(data + 8);
723	ll->ll_crc = le32dec(data + 16);
724	ll->ll_offset = le32dec(data + 20);
725
726	if (ll->ll_sector != sector) {
727		G_LLVM_DEBUG(0, "Expected sector %ju, found at %d",
728		    ll->ll_sector, sector);
729		return (EINVAL);
730	}
731
732	off = ll->ll_offset;
733	/*
734	 * convert the binary uuid to string format, the format is
735	 * xxxxxx-xxxx-xxxx-xxxx-xxxx-xxxx-xxxxxx (6-4-4-4-4-4-6)
736	 */
737	uuid = ll->ll_uuid;
738	bcopy(data + off, uuid, 6);
739	off += 6;
740	uuid += 6;
741	*uuid++ = '-';
742	for (int i = 0; i < 5; i++) {
743		bcopy(data + off, uuid, 4);
744		off += 4;
745		uuid += 4;
746		*uuid++ = '-';
747	}
748	bcopy(data + off, uuid, 6);
749	off += 6;
750	uuid += 6;
751	*uuid++ = '\0';
752
753	ll->ll_size = le64dec(data + off);
754	off += 8;
755	ll->ll_pestart = le64dec(data + off);
756	off += 16;
757
758	/* Only one data section is supported */
759	if (le64dec(data + off) != 0) {
760		G_LLVM_DEBUG(0, "Only one data section supported");
761		return (EINVAL);
762	}
763
764	off += 16;
765	ll->ll_md_offset = le64dec(data + off);
766	off += 8;
767	ll->ll_md_size = le64dec(data + off);
768	off += 8;
769
770	G_LLVM_DEBUG(1, "LVM metadata: offset=%ju, size=%ju", ll->ll_md_offset,
771	    ll->ll_md_size);
772
773	/* Only one data section is supported */
774	if (le64dec(data + off) != 0) {
775		G_LLVM_DEBUG(0, "Only one metadata section supported");
776		return (EINVAL);
777	}
778
779	G_LLVM_DEBUG(2, "label uuid=%s", ll->ll_uuid);
780	G_LLVM_DEBUG(2, "sector=%ju, crc=%u, offset=%u, size=%ju, pestart=%ju",
781	    ll->ll_sector, ll->ll_crc, ll->ll_offset, ll->ll_size,
782	    ll->ll_pestart);
783
784	return (0);
785}
786
787static int
788llvm_md_decode(const u_char *data, struct g_llvm_metadata *md,
789    struct g_llvm_label *ll)
790{
791	uint64_t off;
792	char magic[16];
793
794	off = 0;
795	md->md_csum = le32dec(data + off);
796	off += 4;
797	bcopy(data + off, magic, 16);
798	off += 16;
799	md->md_version = le32dec(data + off);
800	off += 4;
801	md->md_start = le64dec(data + off);
802	off += 8;
803	md->md_size = le64dec(data + off);
804	off += 8;
805
806	if (bcmp(G_LLVM_MAGIC, magic, 16) != 0) {
807		G_LLVM_DEBUG(0, "Incorrect md magic number");
808		return (EINVAL);
809	}
810	if (md->md_version != 1) {
811		G_LLVM_DEBUG(0, "Incorrect md version number (%u)",
812		    md->md_version);
813		return (EINVAL);
814	}
815	if (md->md_start != ll->ll_md_offset) {
816		G_LLVM_DEBUG(0, "Incorrect md offset (%ju)", md->md_start);
817		return (EINVAL);
818	}
819
820	/* Aparently only one is ever returned */
821	md->md_reloffset = le64dec(data + off);
822	off += 8;
823	md->md_relsize = le64dec(data + off);
824	off += 16;	/* XXX skipped checksum */
825
826	if (le64dec(data + off) != 0) {
827		G_LLVM_DEBUG(0, "Only one reloc supported");
828		return (EINVAL);
829	}
830
831	G_LLVM_DEBUG(3, "reloc: offset=%ju, size=%ju",
832	    md->md_reloffset, md->md_relsize);
833	G_LLVM_DEBUG(3, "md: version=%u, start=%ju, size=%ju",
834	    md->md_version, md->md_start, md->md_size);
835
836	return (0);
837}
838
839#define	GRAB_INT(key, tok1, tok2, v)					\
840	if (tok1 && tok2 && strncmp(tok1, key, sizeof(key)) == 0) {	\
841		v = strtol(tok2, &tok1, 10);				\
842		if (tok1 == tok2)					\
843			/* strtol did not eat any of the buffer */	\
844			goto bad;					\
845		continue;						\
846	}
847
848#define	GRAB_STR(key, tok1, tok2, v, len)				\
849	if (tok1 && tok2 && strncmp(tok1, key, sizeof(key)) == 0) {	\
850		strsep(&tok2, "\"");					\
851		if (tok2 == NULL)					\
852			continue;					\
853		tok1 = strsep(&tok2, "\"");				\
854		if (tok2 == NULL)					\
855			continue;					\
856		strncpy(v, tok1, len);					\
857		continue;						\
858	}
859
860#define	SPLIT(key, value, str)						\
861	key = strsep(&value, str);					\
862	/* strip trailing whitespace on the key */			\
863	for (char *t = key; *t != '\0'; t++)				\
864		if (isspace(*t)) {					\
865			*t = '\0';					\
866			break;						\
867		}
868
869static size_t
870llvm_grab_name(char *name, const char *tok)
871{
872	size_t len;
873
874	len = 0;
875	if (tok == NULL)
876		return (0);
877	if (tok[0] == '-')
878		return (0);
879	if (strcmp(tok, ".") == 0 || strcmp(tok, "..") == 0)
880		return (0);
881	while (tok[len] && (isalpha(tok[len]) || isdigit(tok[len]) ||
882	    tok[len] == '.' || tok[len] == '_' || tok[len] == '-' ||
883	    tok[len] == '+') && len < G_LLVM_NAMELEN - 1)
884		len++;
885	bcopy(tok, name, len);
886	name[len] = '\0';
887	return (len);
888}
889
890static int
891llvm_textconf_decode(u_char *data, int buflen, struct g_llvm_metadata *md)
892{
893	struct g_llvm_vg	*vg;
894	char *buf = data;
895	char *tok, *v;
896	char name[G_LLVM_NAMELEN];
897	char uuid[G_LLVM_UUIDLEN];
898	size_t len;
899
900	if (buf == NULL || *buf == '\0')
901		return (EINVAL);
902
903	tok = strsep(&buf, "\n");
904	if (tok == NULL)
905		return (EINVAL);
906	len = llvm_grab_name(name, tok);
907	if (len == 0)
908		return (EINVAL);
909
910	/* check too see if the vg has already been loaded off another disk */
911	LIST_FOREACH(vg, &vg_list, vg_next) {
912		if (strcmp(vg->vg_name, name) == 0) {
913			uuid[0] = '\0';
914			/* grab the volume group uuid */
915			while ((tok = strsep(&buf, "\n")) != NULL) {
916				if (strstr(tok, "{"))
917					break;
918				if (strstr(tok, "=")) {
919					SPLIT(v, tok, "=");
920					GRAB_STR("id", v, tok, uuid,
921					    sizeof(uuid));
922				}
923			}
924			if (strcmp(vg->vg_uuid, uuid) == 0) {
925				/* existing vg */
926				md->md_vg = vg;
927				return (0);
928			}
929			/* XXX different volume group with name clash! */
930			G_LLVM_DEBUG(0,
931			    "%s already exists, volume group not loaded", name);
932			return (EINVAL);
933		}
934	}
935
936	vg = malloc(sizeof(*vg), M_GLLVM, M_NOWAIT|M_ZERO);
937	if (vg == NULL)
938		return (ENOMEM);
939
940	strncpy(vg->vg_name, name, sizeof(vg->vg_name));
941	LIST_INIT(&vg->vg_pvs);
942	LIST_INIT(&vg->vg_lvs);
943
944#define	VOL_FOREACH(func, tok, buf, p)					\
945	while ((tok = strsep(buf, "\n")) != NULL) {			\
946		if (strstr(tok, "{")) {					\
947			func(buf, tok, p);				\
948			continue;					\
949		}							\
950		if (strstr(tok, "}"))					\
951			break;						\
952	}
953
954	while ((tok = strsep(&buf, "\n")) != NULL) {
955		if (strcmp(tok, "physical_volumes {") == 0) {
956			VOL_FOREACH(llvm_textconf_decode_pv, tok, &buf, vg);
957			continue;
958		}
959		if (strcmp(tok, "logical_volumes {") == 0) {
960			VOL_FOREACH(llvm_textconf_decode_lv, tok, &buf, vg);
961			continue;
962		}
963		if (strstr(tok, "{")) {
964			G_LLVM_DEBUG(2, "unknown section %s", tok);
965			continue;
966		}
967
968		/* parse 'key = value' lines */
969		if (strstr(tok, "=")) {
970			SPLIT(v, tok, "=");
971			GRAB_STR("id", v, tok, vg->vg_uuid, sizeof(vg->vg_uuid));
972			GRAB_INT("extent_size", v, tok, vg->vg_extentsize);
973			continue;
974		}
975	}
976	/* basic checking */
977	if (vg->vg_extentsize == 0)
978		goto bad;
979
980	md->md_vg = vg;
981	LIST_INSERT_HEAD(&vg_list, vg, vg_next);
982	G_LLVM_DEBUG(3, "vg: name=%s uuid=%s", vg->vg_name, vg->vg_uuid);
983	return(0);
984
985bad:
986	g_llvm_free_vg(vg);
987	return (-1);
988}
989#undef	VOL_FOREACH
990
991static int
992llvm_textconf_decode_pv(char **buf, char *tok, struct g_llvm_vg *vg)
993{
994	struct g_llvm_pv	*pv;
995	char *v;
996	size_t len;
997
998	if (*buf == NULL || **buf == '\0')
999		return (EINVAL);
1000
1001	pv = malloc(sizeof(*pv), M_GLLVM, M_NOWAIT|M_ZERO);
1002	if (pv == NULL)
1003		return (ENOMEM);
1004
1005	pv->pv_vg = vg;
1006	len = 0;
1007	if (tok == NULL)
1008		goto bad;
1009	len = llvm_grab_name(pv->pv_name, tok);
1010	if (len == 0)
1011		goto bad;
1012
1013	while ((tok = strsep(buf, "\n")) != NULL) {
1014		if (strstr(tok, "{"))
1015			goto bad;
1016
1017		if (strstr(tok, "}"))
1018			break;
1019
1020		/* parse 'key = value' lines */
1021		if (strstr(tok, "=")) {
1022			SPLIT(v, tok, "=");
1023			GRAB_STR("id", v, tok, pv->pv_uuid, sizeof(pv->pv_uuid));
1024			GRAB_INT("pe_start", v, tok, pv->pv_start);
1025			GRAB_INT("pe_count", v, tok, pv->pv_count);
1026			continue;
1027		}
1028	}
1029	if (tok == NULL)
1030		goto bad;
1031	/* basic checking */
1032	if (pv->pv_count == 0)
1033		goto bad;
1034
1035	LIST_INSERT_HEAD(&vg->vg_pvs, pv, pv_next);
1036	G_LLVM_DEBUG(3, "pv: name=%s uuid=%s", pv->pv_name, pv->pv_uuid);
1037
1038	return (0);
1039bad:
1040	free(pv, M_GLLVM);
1041	return (-1);
1042}
1043
1044static int
1045llvm_textconf_decode_lv(char **buf, char *tok, struct g_llvm_vg *vg)
1046{
1047	struct g_llvm_lv	*lv;
1048	struct g_llvm_segment *sg;
1049	char *v;
1050	size_t len;
1051
1052	if (*buf == NULL || **buf == '\0')
1053		return (EINVAL);
1054
1055	lv = malloc(sizeof(*lv), M_GLLVM, M_NOWAIT|M_ZERO);
1056	if (lv == NULL)
1057		return (ENOMEM);
1058
1059	lv->lv_vg = vg;
1060	LIST_INIT(&lv->lv_segs);
1061
1062	if (tok == NULL)
1063		goto bad;
1064	len = llvm_grab_name(lv->lv_name, tok);
1065	if (len == 0)
1066		goto bad;
1067
1068	while ((tok = strsep(buf, "\n")) != NULL) {
1069		if (strstr(tok, "{")) {
1070			if (strstr(tok, "segment")) {
1071				llvm_textconf_decode_sg(buf, tok, lv);
1072				continue;
1073			} else
1074				/* unexpected section */
1075				goto bad;
1076		}
1077
1078		if (strstr(tok, "}"))
1079			break;
1080
1081		/* parse 'key = value' lines */
1082		if (strstr(tok, "=")) {
1083			SPLIT(v, tok, "=");
1084			GRAB_STR("id", v, tok, lv->lv_uuid, sizeof(lv->lv_uuid));
1085			GRAB_INT("segment_count", v, tok, lv->lv_sgcount);
1086			continue;
1087		}
1088	}
1089	if (tok == NULL)
1090		goto bad;
1091	if (lv->lv_sgcount == 0 || lv->lv_sgcount != lv->lv_numsegs)
1092		/* zero or incomplete segment list */
1093		goto bad;
1094
1095	/* Optimize for only one segment on the pv */
1096	lv->lv_firstsg = LIST_FIRST(&lv->lv_segs);
1097	LIST_INSERT_HEAD(&vg->vg_lvs, lv, lv_next);
1098	G_LLVM_DEBUG(3, "lv: name=%s uuid=%s", lv->lv_name, lv->lv_uuid);
1099
1100	return (0);
1101bad:
1102	while ((sg = LIST_FIRST(&lv->lv_segs)) != NULL) {
1103		LIST_REMOVE(sg, sg_next);
1104		free(sg, M_GLLVM);
1105	}
1106	free(lv, M_GLLVM);
1107	return (-1);
1108}
1109
1110static int
1111llvm_textconf_decode_sg(char **buf, char *tok, struct g_llvm_lv *lv)
1112{
1113	struct g_llvm_segment *sg;
1114	char *v;
1115	int count = 0;
1116
1117	if (*buf == NULL || **buf == '\0')
1118		return (EINVAL);
1119
1120	sg = malloc(sizeof(*sg), M_GLLVM, M_NOWAIT|M_ZERO);
1121	if (sg == NULL)
1122		return (ENOMEM);
1123
1124	while ((tok = strsep(buf, "\n")) != NULL) {
1125		/* only a single linear stripe is supported */
1126		if (strstr(tok, "stripe_count")) {
1127			SPLIT(v, tok, "=");
1128			GRAB_INT("stripe_count", v, tok, count);
1129			if (count != 1)
1130				goto bad;
1131		}
1132
1133		if (strstr(tok, "{"))
1134			goto bad;
1135
1136		if (strstr(tok, "}"))
1137			break;
1138
1139		if (strcmp(tok, "stripes = [") == 0) {
1140			tok = strsep(buf, "\n");
1141			if (tok == NULL)
1142				goto bad;
1143
1144			strsep(&tok, "\"");
1145			if (tok == NULL)
1146				goto bad;	/* missing open quotes */
1147			v = strsep(&tok, "\"");
1148			if (tok == NULL)
1149				goto bad;	/* missing close quotes */
1150			strncpy(sg->sg_pvname, v, sizeof(sg->sg_pvname));
1151			if (*tok != ',')
1152				goto bad;	/* missing comma for stripe */
1153			tok++;
1154
1155			sg->sg_pvstart = strtol(tok, &v, 10);
1156			if (v == tok)
1157				/* strtol did not eat any of the buffer */
1158				goto bad;
1159
1160			continue;
1161		}
1162
1163		/* parse 'key = value' lines */
1164		if (strstr(tok, "=")) {
1165			SPLIT(v, tok, "=");
1166			GRAB_INT("start_extent", v, tok, sg->sg_start);
1167			GRAB_INT("extent_count", v, tok, sg->sg_count);
1168			continue;
1169		}
1170	}
1171	if (tok == NULL)
1172		goto bad;
1173	/* basic checking */
1174	if (count != 1 || sg->sg_count == 0)
1175		goto bad;
1176
1177	sg->sg_end = sg->sg_start + sg->sg_count - 1;
1178	lv->lv_numsegs++;
1179	lv->lv_extentcount += sg->sg_count;
1180	LIST_INSERT_HEAD(&lv->lv_segs, sg, sg_next);
1181
1182	return (0);
1183bad:
1184	free(sg, M_GLLVM);
1185	return (-1);
1186}
1187#undef	GRAB_INT
1188#undef	GRAB_STR
1189#undef	SPLIT
1190
1191static struct g_class g_llvm_class = {
1192	.name = G_LLVM_CLASS_NAME,
1193	.version = G_VERSION,
1194	.init = g_llvm_init,
1195	.taste = g_llvm_taste,
1196	.destroy_geom = g_llvm_destroy_geom
1197};
1198
1199DECLARE_GEOM_CLASS(g_llvm_class, g_linux_lvm);
1200MODULE_VERSION(geom_linux_lvm, 0);
1201