geom_bsd.c revision 139778
1/*-
2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. The names of the authors may not be used to endorse or promote
20 *    products derived from this software without specific prior written
21 *    permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36/*
37 * This is the method for dealing with BSD disklabels.  It has been
38 * extensively (by my standards at least) commented, in the vain hope that
39 * it will serve as the source in future copy&paste operations.
40 */
41
42#include <sys/cdefs.h>
43__FBSDID("$FreeBSD: head/sys/geom/geom_bsd.c 139778 2005-01-06 18:27:30Z imp $");
44
45#include <sys/param.h>
46#include <sys/endian.h>
47#include <sys/systm.h>
48#include <sys/kernel.h>
49#include <sys/fcntl.h>
50#include <sys/conf.h>
51#include <sys/bio.h>
52#include <sys/malloc.h>
53#include <sys/lock.h>
54#include <sys/mutex.h>
55#include <sys/md5.h>
56#include <sys/errno.h>
57#include <sys/disklabel.h>
58#include <geom/geom.h>
59#include <geom/geom_slice.h>
60
61#define	BSD_CLASS_NAME "BSD"
62
63#define ALPHA_LABEL_OFFSET	64
64
65#define LABELSIZE (148 + 16 * MAXPARTITIONS)
66
67static void g_bsd_hotwrite(void *arg, int flag);
68/*
69 * Our private data about one instance.  All the rest is handled by the
70 * slice code and stored in its softc, so this is just the stuff
71 * specific to BSD disklabels.
72 */
73struct g_bsd_softc {
74	off_t	labeloffset;
75	off_t	mbroffset;
76	off_t	rawoffset;
77	struct disklabel ondisk;
78	u_char	label[LABELSIZE];
79	u_char	labelsum[16];
80};
81
82/*
83 * Modify our slicer to match proposed disklabel, if possible.
84 * This is where we make sure we don't do something stupid.
85 */
86static int
87g_bsd_modify(struct g_geom *gp, u_char *label)
88{
89	int i, error;
90	struct partition *ppp;
91	struct g_slicer *gsp;
92	struct g_consumer *cp;
93	struct g_bsd_softc *ms;
94	u_int secsize, u;
95	off_t rawoffset, o;
96	struct disklabel dl;
97	MD5_CTX md5sum;
98
99	g_topology_assert();
100	gsp = gp->softc;
101	ms = gsp->softc;
102
103	error = bsd_disklabel_le_dec(label, &dl, MAXPARTITIONS);
104	if (error) {
105		return (error);
106	}
107
108	/* Get dimensions of our device. */
109	cp = LIST_FIRST(&gp->consumer);
110	secsize = cp->provider->sectorsize;
111
112	/* ... or a smaller sector size. */
113	if (dl.d_secsize < secsize) {
114		return (EINVAL);
115	}
116
117	/* ... or a non-multiple sector size. */
118	if (dl.d_secsize % secsize != 0) {
119		return (EINVAL);
120	}
121
122	/* Historical braindamage... */
123	rawoffset = (off_t)dl.d_partitions[RAW_PART].p_offset * dl.d_secsize;
124
125	for (i = 0; i < dl.d_npartitions; i++) {
126		ppp = &dl.d_partitions[i];
127		if (ppp->p_size == 0)
128			continue;
129	        o = (off_t)ppp->p_offset * dl.d_secsize;
130
131		if (o < rawoffset)
132			rawoffset = 0;
133	}
134
135	if (rawoffset != 0 && (off_t)rawoffset != ms->mbroffset)
136		printf("WARNING: Expected rawoffset %jd, found %jd\n",
137		    (intmax_t)ms->mbroffset/dl.d_secsize,
138		    (intmax_t)rawoffset/dl.d_secsize);
139
140	/* Don't munge open partitions. */
141	for (i = 0; i < dl.d_npartitions; i++) {
142		ppp = &dl.d_partitions[i];
143
144	        o = (off_t)ppp->p_offset * dl.d_secsize;
145		if (o == 0)
146			o = rawoffset;
147		error = g_slice_config(gp, i, G_SLICE_CONFIG_CHECK,
148		    o - rawoffset,
149		    (off_t)ppp->p_size * dl.d_secsize,
150		     dl.d_secsize,
151		    "%s%c", gp->name, 'a' + i);
152		if (error)
153			return (error);
154	}
155
156	/* Look good, go for it... */
157	for (u = 0; u < gsp->nslice; u++) {
158		ppp = &dl.d_partitions[u];
159	        o = (off_t)ppp->p_offset * dl.d_secsize;
160		if (o == 0)
161			o = rawoffset;
162		g_slice_config(gp, u, G_SLICE_CONFIG_SET,
163		    o - rawoffset,
164		    (off_t)ppp->p_size * dl.d_secsize,
165		     dl.d_secsize,
166		    "%s%c", gp->name, 'a' + u);
167	}
168
169	/* Update our softc */
170	ms->ondisk = dl;
171	if (label != ms->label)
172		bcopy(label, ms->label, LABELSIZE);
173	ms->rawoffset = rawoffset;
174
175	/*
176	 * In order to avoid recursively attaching to the same
177	 * on-disk label (it's usually visible through the 'c'
178	 * partition) we calculate an MD5 and ask if other BSD's
179	 * below us love that label.  If they do, we don't.
180	 */
181	MD5Init(&md5sum);
182	MD5Update(&md5sum, ms->label, sizeof(ms->label));
183	MD5Final(ms->labelsum, &md5sum);
184
185	return (0);
186}
187
188/*
189 * This is an internal helper function, called multiple times from the taste
190 * function to try to locate a disklabel on the disk.  More civilized formats
191 * will not need this, as there is only one possible place on disk to look
192 * for the magic spot.
193 */
194
195static int
196g_bsd_try(struct g_geom *gp, struct g_slicer *gsp, struct g_consumer *cp, int secsize, struct g_bsd_softc *ms, off_t offset)
197{
198	int error;
199	u_char *buf;
200	struct disklabel *dl;
201	off_t secoff;
202
203	/*
204	 * We need to read entire aligned sectors, and we assume that the
205	 * disklabel does not span sectors, so one sector is enough.
206	 */
207	error = 0;
208	secoff = offset % secsize;
209	buf = g_read_data(cp, offset - secoff, secsize, &error);
210	if (buf == NULL || error != 0)
211		return (ENOENT);
212
213	/* Decode into our native format. */
214	dl = &ms->ondisk;
215	error = bsd_disklabel_le_dec(buf + secoff, dl, MAXPARTITIONS);
216	if (!error)
217		bcopy(buf + secoff, ms->label, LABELSIZE);
218
219	/* Remember to free the buffer g_read_data() gave us. */
220	g_free(buf);
221
222	ms->labeloffset = offset;
223	return (error);
224}
225
226/*
227 * This function writes the current label to disk, possibly updating
228 * the alpha SRM checksum.
229 */
230
231static int
232g_bsd_writelabel(struct g_geom *gp, u_char *bootcode)
233{
234	off_t secoff;
235	u_int secsize;
236	struct g_consumer *cp;
237	struct g_slicer *gsp;
238	struct g_bsd_softc *ms;
239	u_char *buf;
240	uint64_t sum;
241	int error, i;
242
243	gsp = gp->softc;
244	ms = gsp->softc;
245	cp = LIST_FIRST(&gp->consumer);
246	/* Get sector size, we need it to read data. */
247	secsize = cp->provider->sectorsize;
248	secoff = ms->labeloffset % secsize;
249	if (bootcode == NULL) {
250		buf = g_read_data(cp, ms->labeloffset - secoff, secsize, &error);
251		if (buf == NULL || error != 0)
252			return (error);
253		bcopy(ms->label, buf + secoff, sizeof(ms->label));
254	} else {
255		buf = bootcode;
256		bcopy(ms->label, buf + ms->labeloffset, sizeof(ms->label));
257	}
258	if (ms->labeloffset == ALPHA_LABEL_OFFSET) {
259		sum = 0;
260		for (i = 0; i < 63; i++)
261			sum += le64dec(buf + i * 8);
262		le64enc(buf + 504, sum);
263	}
264	if (bootcode == NULL) {
265		error = g_write_data(cp, ms->labeloffset - secoff, buf, secsize);
266		g_free(buf);
267	} else {
268		error = g_write_data(cp, 0, bootcode, BBSIZE);
269	}
270	return(error);
271}
272
273/*
274 * If the user tries to overwrite our disklabel through an open partition
275 * or via a magicwrite config call, we end up here and try to prevent
276 * footshooting as best we can.
277 */
278static void
279g_bsd_hotwrite(void *arg, int flag)
280{
281	struct bio *bp;
282	struct g_geom *gp;
283	struct g_slicer *gsp;
284	struct g_slice *gsl;
285	struct g_bsd_softc *ms;
286	u_char *p;
287	int error;
288
289	g_topology_assert();
290	/*
291	 * We should never get canceled, because that would amount to a removal
292	 * of the geom while there was outstanding I/O requests.
293	 */
294	KASSERT(flag != EV_CANCEL, ("g_bsd_hotwrite cancelled"));
295	bp = arg;
296	gp = bp->bio_to->geom;
297	gsp = gp->softc;
298	ms = gsp->softc;
299	gsl = &gsp->slices[bp->bio_to->index];
300	p = (u_char*)bp->bio_data + ms->labeloffset
301	    - (bp->bio_offset + gsl->offset);
302	error = g_bsd_modify(gp, p);
303	if (error) {
304		g_io_deliver(bp, EPERM);
305		return;
306	}
307	g_slice_finish_hot(bp);
308}
309
310/*-
311 * This start routine is only called for non-trivial requests, all the
312 * trivial ones are handled autonomously by the slice code.
313 * For requests we handle here, we must call the g_io_deliver() on the
314 * bio, and return non-zero to indicate to the slice code that we did so.
315 * This code executes in the "DOWN" I/O path, this means:
316 *    * No sleeping.
317 *    * Don't grab the topology lock.
318 *    * Don't call biowait, g_getattr(), g_setattr() or g_read_data()
319 */
320static int
321g_bsd_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag, struct thread *td)
322{
323	struct g_geom *gp;
324	struct g_bsd_softc *ms;
325	struct g_slicer *gsp;
326	u_char *label;
327	int error;
328
329	gp = pp->geom;
330	gsp = gp->softc;
331	ms = gsp->softc;
332
333	switch(cmd) {
334	case DIOCGDINFO:
335		/* Return a copy of the disklabel to userland. */
336		bsd_disklabel_le_dec(ms->label, data, MAXPARTITIONS);
337		return(0);
338	case DIOCBSDBB: {
339		struct g_consumer *cp;
340		u_char *buf;
341		void *p;
342		int error, i;
343		uint64_t sum;
344
345		if (!(fflag & FWRITE))
346			return (EPERM);
347		/* The disklabel to set is the ioctl argument. */
348		buf = g_malloc(BBSIZE, M_WAITOK);
349		p = *(void **)data;
350		error = copyin(p, buf, BBSIZE);
351		if (!error) {
352			/* XXX: Rude, but supposedly safe */
353			DROP_GIANT();
354			g_topology_lock();
355			/* Validate and modify our slice instance to match. */
356			error = g_bsd_modify(gp, buf + ms->labeloffset);
357			if (!error) {
358				cp = LIST_FIRST(&gp->consumer);
359				if (ms->labeloffset == ALPHA_LABEL_OFFSET) {
360					sum = 0;
361					for (i = 0; i < 63; i++)
362						sum += le64dec(buf + i * 8);
363					le64enc(buf + 504, sum);
364				}
365				error = g_write_data(cp, 0, buf, BBSIZE);
366			}
367			g_topology_unlock();
368			PICKUP_GIANT();
369		}
370		g_free(buf);
371		return (error);
372	}
373	case DIOCSDINFO:
374	case DIOCWDINFO: {
375		label = g_malloc(LABELSIZE, M_WAITOK);
376
377		if (!(fflag & FWRITE))
378			return (EPERM);
379		/* The disklabel to set is the ioctl argument. */
380		bsd_disklabel_le_enc(label, data);
381
382		DROP_GIANT();
383		g_topology_lock();
384		/* Validate and modify our slice instance to match. */
385		error = g_bsd_modify(gp, label);
386		if (error == 0 && cmd == DIOCWDINFO)
387			error = g_bsd_writelabel(gp, NULL);
388		g_topology_unlock();
389		PICKUP_GIANT();
390		g_free(label);
391		return(error);
392	}
393	default:
394		return (ENOIOCTL);
395	}
396}
397
398static int
399g_bsd_start(struct bio *bp)
400{
401	struct g_geom *gp;
402	struct g_bsd_softc *ms;
403	struct g_slicer *gsp;
404
405	gp = bp->bio_to->geom;
406	gsp = gp->softc;
407	ms = gsp->softc;
408	if (bp->bio_cmd == BIO_GETATTR) {
409		if (g_handleattr(bp, "BSD::labelsum", ms->labelsum,
410		    sizeof(ms->labelsum)))
411			return (1);
412	}
413	return (0);
414}
415
416/*
417 * Dump configuration information in XML format.
418 * Notice that the function is called once for the geom and once for each
419 * consumer and provider.  We let g_slice_dumpconf() do most of the work.
420 */
421static void
422g_bsd_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp)
423{
424	struct g_bsd_softc *ms;
425	struct g_slicer *gsp;
426
427	gsp = gp->softc;
428	ms = gsp->softc;
429	g_slice_dumpconf(sb, indent, gp, cp, pp);
430	if (indent != NULL && pp == NULL && cp == NULL) {
431		sbuf_printf(sb, "%s<labeloffset>%jd</labeloffset>\n",
432		    indent, (intmax_t)ms->labeloffset);
433		sbuf_printf(sb, "%s<rawoffset>%jd</rawoffset>\n",
434		    indent, (intmax_t)ms->rawoffset);
435		sbuf_printf(sb, "%s<mbroffset>%jd</mbroffset>\n",
436		    indent, (intmax_t)ms->mbroffset);
437	} else if (pp != NULL) {
438		if (indent == NULL)
439			sbuf_printf(sb, " ty %d",
440			    ms->ondisk.d_partitions[pp->index].p_fstype);
441		else
442			sbuf_printf(sb, "%s<type>%d</type>\n", indent,
443			    ms->ondisk.d_partitions[pp->index].p_fstype);
444	}
445}
446
447/*
448 * The taste function is called from the event-handler, with the topology
449 * lock already held and a provider to examine.  The flags are unused.
450 *
451 * If flags == G_TF_NORMAL, the idea is to take a bite of the provider and
452 * if we find valid, consistent magic on it, build a geom on it.
453 * any magic bits which indicate that we should automatically put a BSD
454 * geom on it.
455 *
456 * There may be cases where the operator would like to put a BSD-geom on
457 * providers which do not meet all of the requirements.  This can be done
458 * by instead passing the G_TF_INSIST flag, which will override these
459 * checks.
460 *
461 * The final flags value is G_TF_TRANSPARENT, which instructs the method
462 * to put a geom on top of the provider and configure it to be as transparent
463 * as possible.  This is not really relevant to the BSD method and therefore
464 * not implemented here.
465 */
466
467static struct g_geom *
468g_bsd_taste(struct g_class *mp, struct g_provider *pp, int flags)
469{
470	struct g_geom *gp;
471	struct g_consumer *cp;
472	int error, i;
473	struct g_bsd_softc *ms;
474	u_int secsize;
475	struct g_slicer *gsp;
476	u_char hash[16];
477	MD5_CTX md5sum;
478
479	g_trace(G_T_TOPOLOGY, "bsd_taste(%s,%s)", mp->name, pp->name);
480	g_topology_assert();
481
482	/* We don't implement transparent inserts. */
483	if (flags == G_TF_TRANSPARENT)
484		return (NULL);
485
486	/*
487	 * BSD labels are a subclass of the general "slicing" topology so
488	 * a lot of the work can be done by the common "slice" code.
489	 * Create a geom with space for MAXPARTITIONS providers, one consumer
490	 * and a softc structure for us.  Specify the provider to attach
491	 * the consumer to and our "start" routine for special requests.
492	 * The provider is opened with mode (1,0,0) so we can do reads
493	 * from it.
494	 */
495	gp = g_slice_new(mp, MAXPARTITIONS, pp, &cp, &ms,
496	     sizeof(*ms), g_bsd_start);
497	if (gp == NULL)
498		return (NULL);
499
500	/* Get the geom_slicer softc from the geom. */
501	gsp = gp->softc;
502
503	/*
504	 * The do...while loop here allows us to have multiple escapes
505	 * using a simple "break".  This improves code clarity without
506	 * ending up in deep nesting and without using goto or come from.
507	 */
508	do {
509		/*
510		 * If the provider is an MBR we will only auto attach
511		 * to type 165 slices in the G_TF_NORMAL case.  We will
512		 * attach to any other type.
513		 */
514		error = g_getattr("MBR::type", cp, &i);
515		if (!error) {
516			if (i != 165 && flags == G_TF_NORMAL)
517				break;
518			error = g_getattr("MBR::offset", cp, &ms->mbroffset);
519			if (error)
520				break;
521		}
522
523		/* Same thing if we are inside a PC98 */
524		error = g_getattr("PC98::type", cp, &i);
525		if (!error) {
526			if (i != 0xc494 && flags == G_TF_NORMAL)
527				break;
528			error = g_getattr("PC98::offset", cp, &ms->mbroffset);
529			if (error)
530				break;
531		}
532
533		/* Get sector size, we need it to read data. */
534		secsize = cp->provider->sectorsize;
535		if (secsize < 512)
536			break;
537
538		/* First look for a label at the start of the second sector. */
539		error = g_bsd_try(gp, gsp, cp, secsize, ms, secsize);
540
541		/* Next, look for alpha labels */
542		if (error)
543			error = g_bsd_try(gp, gsp, cp, secsize, ms,
544			    ALPHA_LABEL_OFFSET);
545
546		/* If we didn't find a label, punt. */
547		if (error)
548			break;
549
550		/*
551		 * In order to avoid recursively attaching to the same
552		 * on-disk label (it's usually visible through the 'c'
553		 * partition) we calculate an MD5 and ask if other BSD's
554		 * below us love that label.  If they do, we don't.
555		 */
556		MD5Init(&md5sum);
557		MD5Update(&md5sum, ms->label, sizeof(ms->label));
558		MD5Final(ms->labelsum, &md5sum);
559
560		error = g_getattr("BSD::labelsum", cp, &hash);
561		if (!error && !bcmp(ms->labelsum, hash, sizeof(hash)))
562			break;
563
564		/*
565		 * Process the found disklabel, and modify our "slice"
566		 * instance to match it, if possible.
567		 */
568		error = g_bsd_modify(gp, ms->label);
569	} while (0);
570
571	/* Success or failure, we can close our provider now. */
572	g_access(cp, -1, 0, 0);
573
574	/* If we have configured any providers, return the new geom. */
575	if (gsp->nprovider > 0) {
576		g_slice_conf_hot(gp, 0, ms->labeloffset, LABELSIZE,
577		    G_SLICE_HOT_ALLOW, G_SLICE_HOT_DENY, G_SLICE_HOT_CALL);
578		gsp->hot = g_bsd_hotwrite;
579		return (gp);
580	}
581	/*
582	 * ...else push the "self-destruct" button, by spoiling our own
583	 * consumer.  This triggers a call to g_slice_spoiled which will
584	 * dismantle what was setup.
585	 */
586	g_slice_spoiled(cp);
587	return (NULL);
588}
589
590struct h0h0 {
591	struct g_geom *gp;
592	struct g_bsd_softc *ms;
593	u_char *label;
594	int error;
595};
596
597static void
598g_bsd_callconfig(void *arg, int flag)
599{
600	struct h0h0 *hp;
601
602	hp = arg;
603	hp->error = g_bsd_modify(hp->gp, hp->label);
604	if (!hp->error)
605		hp->error = g_bsd_writelabel(hp->gp, NULL);
606}
607
608/*
609 * NB! curthread is user process which GCTL'ed.
610 */
611static void
612g_bsd_config(struct gctl_req *req, struct g_class *mp, char const *verb)
613{
614	u_char *label;
615	int error;
616	struct h0h0 h0h0;
617	struct g_geom *gp;
618	struct g_slicer *gsp;
619	struct g_consumer *cp;
620	struct g_bsd_softc *ms;
621
622	g_topology_assert();
623	gp = gctl_get_geom(req, mp, "geom");
624	if (gp == NULL)
625		return;
626	cp = LIST_FIRST(&gp->consumer);
627	gsp = gp->softc;
628	ms = gsp->softc;
629	if (!strcmp(verb, "read mbroffset")) {
630		gctl_set_param(req, "mbroffset",
631		    &ms->mbroffset, sizeof(ms->mbroffset));
632		return;
633	} else if (!strcmp(verb, "write label")) {
634		label = gctl_get_paraml(req, "label", LABELSIZE);
635		if (label == NULL)
636			return;
637		h0h0.gp = gp;
638		h0h0.ms = gsp->softc;
639		h0h0.label = label;
640		h0h0.error = -1;
641		/* XXX: Does this reference register with our selfdestruct code ? */
642		error = g_access(cp, 1, 1, 1);
643		if (error) {
644			gctl_error(req, "could not access consumer");
645			return;
646		}
647		g_bsd_callconfig(&h0h0, 0);
648		error = h0h0.error;
649		g_access(cp, -1, -1, -1);
650	} else if (!strcmp(verb, "write bootcode")) {
651		label = gctl_get_paraml(req, "bootcode", BBSIZE);
652		if (label == NULL)
653			return;
654		/* XXX: Does this reference register with our selfdestruct code ? */
655		error = g_access(cp, 1, 1, 1);
656		if (error) {
657			gctl_error(req, "could not access consumer");
658			return;
659		}
660		error = g_bsd_writelabel(gp, label);
661		g_access(cp, -1, -1, -1);
662	} else {
663		gctl_error(req, "Unknown verb parameter");
664	}
665
666	return;
667}
668
669/* Finally, register with GEOM infrastructure. */
670static struct g_class g_bsd_class = {
671	.name = BSD_CLASS_NAME,
672	.version = G_VERSION,
673	.taste = g_bsd_taste,
674	.ctlreq = g_bsd_config,
675	.dumpconf = g_bsd_dumpconf,
676	.ioctl = g_bsd_ioctl,
677};
678
679DECLARE_GEOM_CLASS(g_bsd_class, g_bsd);
680