1/*-
2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. The names of the authors may not be used to endorse or promote
20 *    products derived from this software without specific prior written
21 *    permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36/*
37 * This is the method for dealing with BSD disklabels.  It has been
38 * extensively (by my standards at least) commented, in the vain hope that
39 * it will serve as the source in future copy&paste operations.
40 */
41
42#include <sys/cdefs.h>
43__FBSDID("$FreeBSD: stable/11/sys/geom/geom_bsd.c 332640 2018-04-17 02:18:04Z kevans $");
44
45#include <sys/param.h>
46#include <sys/endian.h>
47#include <sys/systm.h>
48#include <sys/sysctl.h>
49#include <sys/kernel.h>
50#include <sys/fcntl.h>
51#include <sys/conf.h>
52#include <sys/bio.h>
53#include <sys/malloc.h>
54#include <sys/lock.h>
55#include <sys/mutex.h>
56#include <sys/md5.h>
57#include <sys/errno.h>
58#include <sys/disklabel.h>
59#include <sys/gpt.h>
60#include <sys/proc.h>
61#include <sys/sbuf.h>
62#include <sys/uuid.h>
63#include <geom/geom.h>
64#include <geom/geom_slice.h>
65
66FEATURE(geom_bsd, "GEOM BSD disklabels support");
67
68#define	BSD_CLASS_NAME "BSD"
69
70#define ALPHA_LABEL_OFFSET	64
71#define HISTORIC_LABEL_OFFSET	512
72
73#define LABELSIZE (148 + 16 * MAXPARTITIONS)
74
75static int g_bsd_once;
76
77static void g_bsd_hotwrite(void *arg, int flag);
78/*
79 * Our private data about one instance.  All the rest is handled by the
80 * slice code and stored in its softc, so this is just the stuff
81 * specific to BSD disklabels.
82 */
83struct g_bsd_softc {
84	off_t	labeloffset;
85	off_t	mbroffset;
86	off_t	rawoffset;
87	struct disklabel ondisk;
88	u_char	label[LABELSIZE];
89	u_char	labelsum[16];
90};
91
92/*
93 * Modify our slicer to match proposed disklabel, if possible.
94 * This is where we make sure we don't do something stupid.
95 */
96static int
97g_bsd_modify(struct g_geom *gp, u_char *label)
98{
99	int i, error;
100	struct partition *ppp;
101	struct g_slicer *gsp;
102	struct g_consumer *cp;
103	struct g_bsd_softc *ms;
104	u_int secsize, u;
105	off_t rawoffset, o;
106	struct disklabel dl;
107	MD5_CTX md5sum;
108
109	g_topology_assert();
110	gsp = gp->softc;
111	ms = gsp->softc;
112
113	error = bsd_disklabel_le_dec(label, &dl, MAXPARTITIONS);
114	if (error) {
115		return (error);
116	}
117
118	/* Get dimensions of our device. */
119	cp = LIST_FIRST(&gp->consumer);
120	secsize = cp->provider->sectorsize;
121
122	/* ... or a smaller sector size. */
123	if (dl.d_secsize < secsize) {
124		return (EINVAL);
125	}
126
127	/* ... or a non-multiple sector size. */
128	if (dl.d_secsize % secsize != 0) {
129		return (EINVAL);
130	}
131
132	/* Historical braindamage... */
133	rawoffset = (off_t)dl.d_partitions[RAW_PART].p_offset * dl.d_secsize;
134
135	for (i = 0; i < dl.d_npartitions; i++) {
136		ppp = &dl.d_partitions[i];
137		if (ppp->p_size == 0)
138			continue;
139	        o = (off_t)ppp->p_offset * dl.d_secsize;
140
141		if (o < rawoffset)
142			rawoffset = 0;
143	}
144
145	if (rawoffset != 0 && (off_t)rawoffset != ms->mbroffset)
146		printf("WARNING: %s expected rawoffset %jd, found %jd\n",
147		    gp->name,
148		    (intmax_t)ms->mbroffset/dl.d_secsize,
149		    (intmax_t)rawoffset/dl.d_secsize);
150
151	/* Don't munge open partitions. */
152	for (i = 0; i < dl.d_npartitions; i++) {
153		ppp = &dl.d_partitions[i];
154
155	        o = (off_t)ppp->p_offset * dl.d_secsize;
156		if (o == 0)
157			o = rawoffset;
158		error = g_slice_config(gp, i, G_SLICE_CONFIG_CHECK,
159		    o - rawoffset,
160		    (off_t)ppp->p_size * dl.d_secsize,
161		     dl.d_secsize,
162		    "%s%c", gp->name, 'a' + i);
163		if (error)
164			return (error);
165	}
166
167	/* Look good, go for it... */
168	for (u = 0; u < gsp->nslice; u++) {
169		ppp = &dl.d_partitions[u];
170	        o = (off_t)ppp->p_offset * dl.d_secsize;
171		if (o == 0)
172			o = rawoffset;
173		g_slice_config(gp, u, G_SLICE_CONFIG_SET,
174		    o - rawoffset,
175		    (off_t)ppp->p_size * dl.d_secsize,
176		     dl.d_secsize,
177		    "%s%c", gp->name, 'a' + u);
178	}
179
180	/* Update our softc */
181	ms->ondisk = dl;
182	if (label != ms->label)
183		bcopy(label, ms->label, LABELSIZE);
184	ms->rawoffset = rawoffset;
185
186	/*
187	 * In order to avoid recursively attaching to the same
188	 * on-disk label (it's usually visible through the 'c'
189	 * partition) we calculate an MD5 and ask if other BSD's
190	 * below us love that label.  If they do, we don't.
191	 */
192	MD5Init(&md5sum);
193	MD5Update(&md5sum, ms->label, sizeof(ms->label));
194	MD5Final(ms->labelsum, &md5sum);
195
196	return (0);
197}
198
199/*
200 * This is an internal helper function, called multiple times from the taste
201 * function to try to locate a disklabel on the disk.  More civilized formats
202 * will not need this, as there is only one possible place on disk to look
203 * for the magic spot.
204 */
205
206static int
207g_bsd_try(struct g_geom *gp, struct g_slicer *gsp, struct g_consumer *cp, int secsize, struct g_bsd_softc *ms, off_t offset)
208{
209	int error;
210	u_char *buf;
211	struct disklabel *dl;
212	off_t secoff;
213
214	/*
215	 * We need to read entire aligned sectors, and we assume that the
216	 * disklabel does not span sectors, so one sector is enough.
217	 */
218	secoff = offset % secsize;
219	buf = g_read_data(cp, offset - secoff, secsize, NULL);
220	if (buf == NULL)
221		return (ENOENT);
222
223	/* Decode into our native format. */
224	dl = &ms->ondisk;
225	error = bsd_disklabel_le_dec(buf + secoff, dl, MAXPARTITIONS);
226	if (!error)
227		bcopy(buf + secoff, ms->label, LABELSIZE);
228
229	/* Remember to free the buffer g_read_data() gave us. */
230	g_free(buf);
231
232	ms->labeloffset = offset;
233	return (error);
234}
235
236/*
237 * This function writes the current label to disk, possibly updating
238 * the alpha SRM checksum.
239 */
240
241static int
242g_bsd_writelabel(struct g_geom *gp, u_char *bootcode)
243{
244	off_t secoff;
245	u_int secsize;
246	struct g_consumer *cp;
247	struct g_slicer *gsp;
248	struct g_bsd_softc *ms;
249	u_char *buf;
250	uint64_t sum;
251	int error, i;
252
253	gsp = gp->softc;
254	ms = gsp->softc;
255	cp = LIST_FIRST(&gp->consumer);
256	/* Get sector size, we need it to read data. */
257	secsize = cp->provider->sectorsize;
258	secoff = ms->labeloffset % secsize;
259	if (bootcode == NULL) {
260		buf = g_read_data(cp, ms->labeloffset - secoff, secsize, &error);
261		if (buf == NULL)
262			return (error);
263		bcopy(ms->label, buf + secoff, sizeof(ms->label));
264	} else {
265		buf = bootcode;
266		bcopy(ms->label, buf + ms->labeloffset, sizeof(ms->label));
267	}
268	if (ms->labeloffset == ALPHA_LABEL_OFFSET) {
269		sum = 0;
270		for (i = 0; i < 63; i++)
271			sum += le64dec(buf + i * 8);
272		le64enc(buf + 504, sum);
273	}
274	if (bootcode == NULL) {
275		error = g_write_data(cp, ms->labeloffset - secoff, buf, secsize);
276		g_free(buf);
277	} else {
278		error = g_write_data(cp, 0, bootcode, BBSIZE);
279	}
280	return(error);
281}
282
283/*
284 * If the user tries to overwrite our disklabel through an open partition
285 * or via a magicwrite config call, we end up here and try to prevent
286 * footshooting as best we can.
287 */
288static void
289g_bsd_hotwrite(void *arg, int flag)
290{
291	struct bio *bp;
292	struct g_geom *gp;
293	struct g_slicer *gsp;
294	struct g_slice *gsl;
295	struct g_bsd_softc *ms;
296	u_char *p;
297	int error;
298
299	g_topology_assert();
300	/*
301	 * We should never get canceled, because that would amount to a removal
302	 * of the geom while there was outstanding I/O requests.
303	 */
304	KASSERT(flag != EV_CANCEL, ("g_bsd_hotwrite cancelled"));
305	bp = arg;
306	gp = bp->bio_to->geom;
307	gsp = gp->softc;
308	ms = gsp->softc;
309	gsl = &gsp->slices[bp->bio_to->index];
310	p = (u_char*)bp->bio_data + ms->labeloffset -
311	    (bp->bio_offset + gsl->offset);
312	error = g_bsd_modify(gp, p);
313	if (error) {
314		g_io_deliver(bp, EPERM);
315		return;
316	}
317	g_slice_finish_hot(bp);
318}
319
320static int
321g_bsd_start(struct bio *bp)
322{
323	struct g_geom *gp;
324	struct g_bsd_softc *ms;
325	struct g_slicer *gsp;
326
327	gp = bp->bio_to->geom;
328	gsp = gp->softc;
329	ms = gsp->softc;
330	if (bp->bio_cmd == BIO_GETATTR) {
331		if (g_handleattr(bp, "BSD::labelsum", ms->labelsum,
332		    sizeof(ms->labelsum)))
333			return (1);
334	}
335	return (0);
336}
337
338/*
339 * Dump configuration information in XML format.
340 * Notice that the function is called once for the geom and once for each
341 * consumer and provider.  We let g_slice_dumpconf() do most of the work.
342 */
343static void
344g_bsd_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp)
345{
346	struct g_bsd_softc *ms;
347	struct g_slicer *gsp;
348
349	gsp = gp->softc;
350	ms = gsp->softc;
351	g_slice_dumpconf(sb, indent, gp, cp, pp);
352	if (indent != NULL && pp == NULL && cp == NULL) {
353		sbuf_printf(sb, "%s<labeloffset>%jd</labeloffset>\n",
354		    indent, (intmax_t)ms->labeloffset);
355		sbuf_printf(sb, "%s<rawoffset>%jd</rawoffset>\n",
356		    indent, (intmax_t)ms->rawoffset);
357		sbuf_printf(sb, "%s<mbroffset>%jd</mbroffset>\n",
358		    indent, (intmax_t)ms->mbroffset);
359	} else if (pp != NULL) {
360		if (indent == NULL)
361			sbuf_printf(sb, " ty %d",
362			    ms->ondisk.d_partitions[pp->index].p_fstype);
363		else
364			sbuf_printf(sb, "%s<type>%d</type>\n", indent,
365			    ms->ondisk.d_partitions[pp->index].p_fstype);
366	}
367}
368
369/*
370 * The taste function is called from the event-handler, with the topology
371 * lock already held and a provider to examine.  The flags are unused.
372 *
373 * If flags == G_TF_NORMAL, the idea is to take a bite of the provider and
374 * if we find valid, consistent magic on it, build a geom on it.
375 *
376 * There may be cases where the operator would like to put a BSD-geom on
377 * providers which do not meet all of the requirements.  This can be done
378 * by instead passing the G_TF_INSIST flag, which will override these
379 * checks.
380 *
381 * The final flags value is G_TF_TRANSPARENT, which instructs the method
382 * to put a geom on top of the provider and configure it to be as transparent
383 * as possible.  This is not really relevant to the BSD method and therefore
384 * not implemented here.
385 */
386
387static struct uuid freebsd_slice = GPT_ENT_TYPE_FREEBSD;
388
389static struct g_geom *
390g_bsd_taste(struct g_class *mp, struct g_provider *pp, int flags)
391{
392	struct g_geom *gp;
393	struct g_consumer *cp;
394	int error, i;
395	struct g_bsd_softc *ms;
396	u_int secsize;
397	struct g_slicer *gsp;
398	u_char hash[16];
399	MD5_CTX md5sum;
400	struct uuid uuid;
401
402	g_trace(G_T_TOPOLOGY, "bsd_taste(%s,%s)", mp->name, pp->name);
403	g_topology_assert();
404
405	/* We don't implement transparent inserts. */
406	if (flags == G_TF_TRANSPARENT)
407		return (NULL);
408
409	/*
410	 * BSD labels are a subclass of the general "slicing" topology so
411	 * a lot of the work can be done by the common "slice" code.
412	 * Create a geom with space for MAXPARTITIONS providers, one consumer
413	 * and a softc structure for us.  Specify the provider to attach
414	 * the consumer to and our "start" routine for special requests.
415	 * The provider is opened with mode (1,0,0) so we can do reads
416	 * from it.
417	 */
418	gp = g_slice_new(mp, MAXPARTITIONS, pp, &cp, &ms,
419	     sizeof(*ms), g_bsd_start);
420	if (gp == NULL)
421		return (NULL);
422
423	/* Get the geom_slicer softc from the geom. */
424	gsp = gp->softc;
425
426	/*
427	 * The do...while loop here allows us to have multiple escapes
428	 * using a simple "break".  This improves code clarity without
429	 * ending up in deep nesting and without using goto or come from.
430	 */
431	do {
432		/*
433		 * If the provider is an MBR we will only auto attach
434		 * to type 165 slices in the G_TF_NORMAL case.  We will
435		 * attach to any other type.
436		 */
437		error = g_getattr("MBR::type", cp, &i);
438		if (!error) {
439			if (i != 165 && flags == G_TF_NORMAL)
440				break;
441			error = g_getattr("MBR::offset", cp, &ms->mbroffset);
442			if (error)
443				break;
444		}
445
446		/* Same thing if we are inside a PC98 */
447		error = g_getattr("PC98::type", cp, &i);
448		if (!error) {
449			if (i != 0xc494 && flags == G_TF_NORMAL)
450				break;
451			error = g_getattr("PC98::offset", cp, &ms->mbroffset);
452			if (error)
453				break;
454		}
455
456		/* Same thing if we are inside a GPT */
457		error = g_getattr("GPT::type", cp, &uuid);
458		if (!error) {
459			if (memcmp(&uuid, &freebsd_slice, sizeof(uuid)) != 0 &&
460			    flags == G_TF_NORMAL)
461				break;
462		}
463
464		/* Get sector size, we need it to read data. */
465		secsize = cp->provider->sectorsize;
466		if (secsize < 512)
467			break;
468
469		/* First look for a label at the start of the second sector. */
470		error = g_bsd_try(gp, gsp, cp, secsize, ms, secsize);
471
472		/*
473		 * If sector size is not 512 the label still can be at
474		 * offset 512, not at the start of the second sector. At least
475		 * it's true for labels created by the FreeBSD's bsdlabel(8).
476		 */
477		if (error && secsize != HISTORIC_LABEL_OFFSET)
478			error = g_bsd_try(gp, gsp, cp, secsize, ms,
479			    HISTORIC_LABEL_OFFSET);
480
481		/* Next, look for alpha labels */
482		if (error)
483			error = g_bsd_try(gp, gsp, cp, secsize, ms,
484			    ALPHA_LABEL_OFFSET);
485
486		/* If we didn't find a label, punt. */
487		if (error)
488			break;
489
490		/*
491		 * In order to avoid recursively attaching to the same
492		 * on-disk label (it's usually visible through the 'c'
493		 * partition) we calculate an MD5 and ask if other BSD's
494		 * below us love that label.  If they do, we don't.
495		 */
496		MD5Init(&md5sum);
497		MD5Update(&md5sum, ms->label, sizeof(ms->label));
498		MD5Final(ms->labelsum, &md5sum);
499
500		error = g_getattr("BSD::labelsum", cp, &hash);
501		if (!error && !bcmp(ms->labelsum, hash, sizeof(hash)))
502			break;
503
504		/*
505		 * Process the found disklabel, and modify our "slice"
506		 * instance to match it, if possible.
507		 */
508		error = g_bsd_modify(gp, ms->label);
509	} while (0);
510
511	/* Success or failure, we can close our provider now. */
512	g_access(cp, -1, 0, 0);
513
514	/* If we have configured any providers, return the new geom. */
515	if (gsp->nprovider > 0) {
516		g_slice_conf_hot(gp, 0, ms->labeloffset, LABELSIZE,
517		    G_SLICE_HOT_ALLOW, G_SLICE_HOT_DENY, G_SLICE_HOT_CALL);
518		gsp->hot = g_bsd_hotwrite;
519		if (!g_bsd_once) {
520			g_bsd_once = 1;
521			printf(
522			    "WARNING: geom_bsd (geom %s) is deprecated, "
523			    "use gpart instead.\n", gp->name);
524		}
525		return (gp);
526	}
527	/*
528	 * ...else push the "self-destruct" button, by spoiling our own
529	 * consumer.  This triggers a call to g_slice_spoiled which will
530	 * dismantle what was setup.
531	 */
532	g_slice_spoiled(cp);
533	return (NULL);
534}
535
536struct h0h0 {
537	struct g_geom *gp;
538	struct g_bsd_softc *ms;
539	u_char *label;
540	int error;
541};
542
543static void
544g_bsd_callconfig(void *arg, int flag)
545{
546	struct h0h0 *hp;
547
548	hp = arg;
549	hp->error = g_bsd_modify(hp->gp, hp->label);
550	if (!hp->error)
551		hp->error = g_bsd_writelabel(hp->gp, NULL);
552}
553
554/*
555 * NB! curthread is user process which GCTL'ed.
556 */
557static void
558g_bsd_config(struct gctl_req *req, struct g_class *mp, char const *verb)
559{
560	u_char *label;
561	int error;
562	struct h0h0 h0h0;
563	struct g_geom *gp;
564	struct g_slicer *gsp;
565	struct g_consumer *cp;
566	struct g_bsd_softc *ms;
567
568	g_topology_assert();
569	gp = gctl_get_geom(req, mp, "geom");
570	if (gp == NULL)
571		return;
572	cp = LIST_FIRST(&gp->consumer);
573	gsp = gp->softc;
574	ms = gsp->softc;
575	if (!strcmp(verb, "read mbroffset")) {
576		gctl_set_param_err(req, "mbroffset", &ms->mbroffset,
577		    sizeof(ms->mbroffset));
578		return;
579	} else if (!strcmp(verb, "write label")) {
580		label = gctl_get_paraml(req, "label", LABELSIZE);
581		if (label == NULL)
582			return;
583		h0h0.gp = gp;
584		h0h0.ms = gsp->softc;
585		h0h0.label = label;
586		h0h0.error = -1;
587		/* XXX: Does this reference register with our selfdestruct code ? */
588		error = g_access(cp, 1, 1, 1);
589		if (error) {
590			gctl_error(req, "could not access consumer");
591			return;
592		}
593		g_bsd_callconfig(&h0h0, 0);
594		error = h0h0.error;
595		g_access(cp, -1, -1, -1);
596	} else if (!strcmp(verb, "write bootcode")) {
597		label = gctl_get_paraml(req, "bootcode", BBSIZE);
598		if (label == NULL)
599			return;
600		/* XXX: Does this reference register with our selfdestruct code ? */
601		error = g_access(cp, 1, 1, 1);
602		if (error) {
603			gctl_error(req, "could not access consumer");
604			return;
605		}
606		error = g_bsd_writelabel(gp, label);
607		g_access(cp, -1, -1, -1);
608	} else {
609		gctl_error(req, "Unknown verb parameter");
610	}
611
612	return;
613}
614
615/* Finally, register with GEOM infrastructure. */
616static struct g_class g_bsd_class = {
617	.name = BSD_CLASS_NAME,
618	.version = G_VERSION,
619	.taste = g_bsd_taste,
620	.ctlreq = g_bsd_config,
621	.dumpconf = g_bsd_dumpconf,
622};
623
624DECLARE_GEOM_CLASS(g_bsd_class, g_bsd);
625MODULE_VERSION(geom_bsd, 0);
626