1/*-
2 * Copyright (c) 2007, 2008 Marcel Moolenaar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD$");
29
30#include <sys/param.h>
31#include <sys/bio.h>
32#include <sys/diskmbr.h>
33#include <sys/endian.h>
34#include <sys/kernel.h>
35#include <sys/kobj.h>
36#include <sys/limits.h>
37#include <sys/lock.h>
38#include <sys/malloc.h>
39#include <sys/mutex.h>
40#include <sys/queue.h>
41#include <sys/sbuf.h>
42#include <sys/systm.h>
43#include <sys/sysctl.h>
44#include <geom/geom.h>
45#include <geom/geom_int.h>
46#include <geom/part/g_part.h>
47
48#include "g_part_if.h"
49
50FEATURE(geom_part_mbr, "GEOM partitioning class for MBR support");
51
52#define	MBRSIZE		512
53
54struct g_part_mbr_table {
55	struct g_part_table	base;
56	u_char		mbr[MBRSIZE];
57};
58
59struct g_part_mbr_entry {
60	struct g_part_entry	base;
61	struct dos_partition ent;
62};
63
64static int g_part_mbr_add(struct g_part_table *, struct g_part_entry *,
65    struct g_part_parms *);
66static int g_part_mbr_bootcode(struct g_part_table *, struct g_part_parms *);
67static int g_part_mbr_create(struct g_part_table *, struct g_part_parms *);
68static int g_part_mbr_destroy(struct g_part_table *, struct g_part_parms *);
69static void g_part_mbr_dumpconf(struct g_part_table *, struct g_part_entry *,
70    struct sbuf *, const char *);
71static int g_part_mbr_dumpto(struct g_part_table *, struct g_part_entry *);
72static int g_part_mbr_modify(struct g_part_table *, struct g_part_entry *,
73    struct g_part_parms *);
74static const char *g_part_mbr_name(struct g_part_table *, struct g_part_entry *,
75    char *, size_t);
76static int g_part_mbr_probe(struct g_part_table *, struct g_consumer *);
77static int g_part_mbr_read(struct g_part_table *, struct g_consumer *);
78static int g_part_mbr_setunset(struct g_part_table *, struct g_part_entry *,
79    const char *, unsigned int);
80static const char *g_part_mbr_type(struct g_part_table *, struct g_part_entry *,
81    char *, size_t);
82static int g_part_mbr_write(struct g_part_table *, struct g_consumer *);
83static int g_part_mbr_resize(struct g_part_table *, struct g_part_entry *,
84    struct g_part_parms *);
85
86static kobj_method_t g_part_mbr_methods[] = {
87	KOBJMETHOD(g_part_add,		g_part_mbr_add),
88	KOBJMETHOD(g_part_bootcode,	g_part_mbr_bootcode),
89	KOBJMETHOD(g_part_create,	g_part_mbr_create),
90	KOBJMETHOD(g_part_destroy,	g_part_mbr_destroy),
91	KOBJMETHOD(g_part_dumpconf,	g_part_mbr_dumpconf),
92	KOBJMETHOD(g_part_dumpto,	g_part_mbr_dumpto),
93	KOBJMETHOD(g_part_modify,	g_part_mbr_modify),
94	KOBJMETHOD(g_part_resize,	g_part_mbr_resize),
95	KOBJMETHOD(g_part_name,		g_part_mbr_name),
96	KOBJMETHOD(g_part_probe,	g_part_mbr_probe),
97	KOBJMETHOD(g_part_read,		g_part_mbr_read),
98	KOBJMETHOD(g_part_setunset,	g_part_mbr_setunset),
99	KOBJMETHOD(g_part_type,		g_part_mbr_type),
100	KOBJMETHOD(g_part_write,	g_part_mbr_write),
101	{ 0, 0 }
102};
103
104static struct g_part_scheme g_part_mbr_scheme = {
105	"MBR",
106	g_part_mbr_methods,
107	sizeof(struct g_part_mbr_table),
108	.gps_entrysz = sizeof(struct g_part_mbr_entry),
109	.gps_minent = NDOSPART,
110	.gps_maxent = NDOSPART,
111	.gps_bootcodesz = MBRSIZE,
112};
113G_PART_SCHEME_DECLARE(g_part_mbr);
114
115static struct g_part_mbr_alias {
116	u_char		typ;
117	int		alias;
118} mbr_alias_match[] = {
119	{ DOSPTYP_386BSD,	G_PART_ALIAS_FREEBSD },
120	{ DOSPTYP_EXT,		G_PART_ALIAS_EBR },
121	{ DOSPTYP_NTFS,		G_PART_ALIAS_MS_NTFS },
122	{ DOSPTYP_FAT16,	G_PART_ALIAS_MS_FAT16 },
123	{ DOSPTYP_FAT32,	G_PART_ALIAS_MS_FAT32 },
124	{ DOSPTYP_EXTLBA,	G_PART_ALIAS_EBR },
125	{ DOSPTYP_LDM,		G_PART_ALIAS_MS_LDM_DATA },
126	{ DOSPTYP_LINSWP,	G_PART_ALIAS_LINUX_SWAP },
127	{ DOSPTYP_LINUX,	G_PART_ALIAS_LINUX_DATA },
128	{ DOSPTYP_LINLVM,	G_PART_ALIAS_LINUX_LVM },
129	{ DOSPTYP_LINRAID,	G_PART_ALIAS_LINUX_RAID },
130	{ DOSPTYP_VMFS,		G_PART_ALIAS_VMFS },
131	{ DOSPTYP_VMKDIAG,	G_PART_ALIAS_VMKDIAG },
132};
133
134static int
135mbr_parse_type(const char *type, u_char *dp_typ)
136{
137	const char *alias;
138	char *endp;
139	long lt;
140	int i;
141
142	if (type[0] == '!') {
143		lt = strtol(type + 1, &endp, 0);
144		if (type[1] == '\0' || *endp != '\0' || lt <= 0 || lt >= 256)
145			return (EINVAL);
146		*dp_typ = (u_char)lt;
147		return (0);
148	}
149	for (i = 0;
150	    i < sizeof(mbr_alias_match) / sizeof(mbr_alias_match[0]); i++) {
151		alias = g_part_alias_name(mbr_alias_match[i].alias);
152		if (strcasecmp(type, alias) == 0) {
153			*dp_typ = mbr_alias_match[i].typ;
154			return (0);
155		}
156	}
157	return (EINVAL);
158}
159
160static int
161mbr_probe_bpb(u_char *bpb)
162{
163	uint16_t secsz;
164	uint8_t clstsz;
165
166#define PO2(x)	((x & (x - 1)) == 0)
167	secsz = le16dec(bpb);
168	if (secsz < 512 || secsz > 4096 || !PO2(secsz))
169		return (0);
170	clstsz = bpb[2];
171	if (clstsz < 1 || clstsz > 128 || !PO2(clstsz))
172		return (0);
173#undef PO2
174
175	return (1);
176}
177
178static void
179mbr_set_chs(struct g_part_table *table, uint32_t lba, u_char *cylp, u_char *hdp,
180    u_char *secp)
181{
182	uint32_t cyl, hd, sec;
183
184	sec = lba % table->gpt_sectors + 1;
185	lba /= table->gpt_sectors;
186	hd = lba % table->gpt_heads;
187	lba /= table->gpt_heads;
188	cyl = lba;
189	if (cyl > 1023)
190		sec = hd = cyl = ~0;
191
192	*cylp = cyl & 0xff;
193	*hdp = hd & 0xff;
194	*secp = (sec & 0x3f) | ((cyl >> 2) & 0xc0);
195}
196
197static int
198mbr_align(struct g_part_table *basetable, uint32_t *start, uint32_t *size)
199{
200	uint32_t sectors;
201
202	sectors = basetable->gpt_sectors;
203	if (*size < sectors)
204		return (EINVAL);
205	if (start != NULL && (*start % sectors)) {
206		*size += (*start % sectors) - sectors;
207		*start -= (*start % sectors) - sectors;
208	}
209	if (*size % sectors)
210		*size -= (*size % sectors);
211	if (*size < sectors)
212		return (EINVAL);
213	return (0);
214}
215
216static int
217g_part_mbr_add(struct g_part_table *basetable, struct g_part_entry *baseentry,
218    struct g_part_parms *gpp)
219{
220	struct g_part_mbr_entry *entry;
221	uint32_t start, size;
222
223	if (gpp->gpp_parms & G_PART_PARM_LABEL)
224		return (EINVAL);
225
226	entry = (struct g_part_mbr_entry *)baseentry;
227	start = gpp->gpp_start;
228	size = gpp->gpp_size;
229	if (mbr_align(basetable, &start, &size) != 0)
230		return (EINVAL);
231	if (baseentry->gpe_deleted)
232		bzero(&entry->ent, sizeof(entry->ent));
233
234	KASSERT(baseentry->gpe_start <= start, ("%s", __func__));
235	KASSERT(baseentry->gpe_end >= start + size - 1, ("%s", __func__));
236	baseentry->gpe_start = start;
237	baseentry->gpe_end = start + size - 1;
238	entry->ent.dp_start = start;
239	entry->ent.dp_size = size;
240	mbr_set_chs(basetable, baseentry->gpe_start, &entry->ent.dp_scyl,
241	    &entry->ent.dp_shd, &entry->ent.dp_ssect);
242	mbr_set_chs(basetable, baseentry->gpe_end, &entry->ent.dp_ecyl,
243	    &entry->ent.dp_ehd, &entry->ent.dp_esect);
244	return (mbr_parse_type(gpp->gpp_type, &entry->ent.dp_typ));
245}
246
247static int
248g_part_mbr_bootcode(struct g_part_table *basetable, struct g_part_parms *gpp)
249{
250	struct g_part_mbr_table *table;
251	uint32_t dsn;
252
253	if (gpp->gpp_codesize != MBRSIZE)
254		return (ENODEV);
255
256	table = (struct g_part_mbr_table *)basetable;
257	dsn = *(uint32_t *)(table->mbr + DOSDSNOFF);
258	bcopy(gpp->gpp_codeptr, table->mbr, DOSPARTOFF);
259	if (dsn != 0)
260		*(uint32_t *)(table->mbr + DOSDSNOFF) = dsn;
261	return (0);
262}
263
264static int
265g_part_mbr_create(struct g_part_table *basetable, struct g_part_parms *gpp)
266{
267	struct g_provider *pp;
268	struct g_part_mbr_table *table;
269
270	pp = gpp->gpp_provider;
271	if (pp->sectorsize < MBRSIZE)
272		return (ENOSPC);
273
274	basetable->gpt_first = basetable->gpt_sectors;
275	basetable->gpt_last = MIN(pp->mediasize / pp->sectorsize,
276	    UINT32_MAX) - 1;
277
278	table = (struct g_part_mbr_table *)basetable;
279	le16enc(table->mbr + DOSMAGICOFFSET, DOSMAGIC);
280	return (0);
281}
282
283static int
284g_part_mbr_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
285{
286
287	/* Wipe the first sector to clear the partitioning. */
288	basetable->gpt_smhead |= 1;
289	return (0);
290}
291
292static void
293g_part_mbr_dumpconf(struct g_part_table *table, struct g_part_entry *baseentry,
294    struct sbuf *sb, const char *indent)
295{
296	struct g_part_mbr_entry *entry;
297
298	entry = (struct g_part_mbr_entry *)baseentry;
299	if (indent == NULL) {
300		/* conftxt: libdisk compatibility */
301		sbuf_printf(sb, " xs MBR xt %u", entry->ent.dp_typ);
302	} else if (entry != NULL) {
303		/* confxml: partition entry information */
304		sbuf_printf(sb, "%s<rawtype>%u</rawtype>\n", indent,
305		    entry->ent.dp_typ);
306		if (entry->ent.dp_flag & 0x80)
307			sbuf_printf(sb, "%s<attrib>active</attrib>\n", indent);
308	} else {
309		/* confxml: scheme information */
310	}
311}
312
313static int
314g_part_mbr_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)
315{
316	struct g_part_mbr_entry *entry;
317
318	/* Allow dumping to a FreeBSD partition or Linux swap partition only. */
319	entry = (struct g_part_mbr_entry *)baseentry;
320	return ((entry->ent.dp_typ == DOSPTYP_386BSD ||
321	    entry->ent.dp_typ == DOSPTYP_LINSWP) ? 1 : 0);
322}
323
324static int
325g_part_mbr_modify(struct g_part_table *basetable,
326    struct g_part_entry *baseentry, struct g_part_parms *gpp)
327{
328	struct g_part_mbr_entry *entry;
329
330	if (gpp->gpp_parms & G_PART_PARM_LABEL)
331		return (EINVAL);
332
333	entry = (struct g_part_mbr_entry *)baseentry;
334	if (gpp->gpp_parms & G_PART_PARM_TYPE)
335		return (mbr_parse_type(gpp->gpp_type, &entry->ent.dp_typ));
336	return (0);
337}
338
339static int
340g_part_mbr_resize(struct g_part_table *basetable,
341    struct g_part_entry *baseentry, struct g_part_parms *gpp)
342{
343	struct g_part_mbr_entry *entry;
344	struct g_provider *pp;
345	uint32_t size;
346
347	size = gpp->gpp_size;
348	if (mbr_align(basetable, NULL, &size) != 0)
349		return (EINVAL);
350	/* XXX: prevent unexpected shrinking. */
351	pp = baseentry->gpe_pp;
352	if ((g_debugflags & 0x10) == 0 && size < gpp->gpp_size &&
353	    pp->mediasize / pp->sectorsize > size)
354		return (EBUSY);
355	entry = (struct g_part_mbr_entry *)baseentry;
356	baseentry->gpe_end = baseentry->gpe_start + size - 1;
357	entry->ent.dp_size = size;
358	mbr_set_chs(basetable, baseentry->gpe_end, &entry->ent.dp_ecyl,
359	    &entry->ent.dp_ehd, &entry->ent.dp_esect);
360	return (0);
361}
362
363static const char *
364g_part_mbr_name(struct g_part_table *table, struct g_part_entry *baseentry,
365    char *buf, size_t bufsz)
366{
367
368	snprintf(buf, bufsz, "s%d", baseentry->gpe_index);
369	return (buf);
370}
371
372static int
373g_part_mbr_probe(struct g_part_table *table, struct g_consumer *cp)
374{
375	char psn[8];
376	struct g_provider *pp;
377	u_char *buf, *p;
378	int error, index, res, sum;
379	uint16_t magic;
380
381	pp = cp->provider;
382
383	/* Sanity-check the provider. */
384	if (pp->sectorsize < MBRSIZE || pp->mediasize < pp->sectorsize)
385		return (ENOSPC);
386	if (pp->sectorsize > 4096)
387		return (ENXIO);
388
389	/* We don't nest under an MBR (see EBR instead). */
390	error = g_getattr("PART::scheme", cp, &psn);
391	if (error == 0 && strcmp(psn, g_part_mbr_scheme.name) == 0)
392		return (ELOOP);
393
394	/* Check that there's a MBR. */
395	buf = g_read_data(cp, 0L, pp->sectorsize, &error);
396	if (buf == NULL)
397		return (error);
398
399	/* We goto out on mismatch. */
400	res = ENXIO;
401
402	magic = le16dec(buf + DOSMAGICOFFSET);
403	if (magic != DOSMAGIC)
404		goto out;
405
406	for (index = 0; index < NDOSPART; index++) {
407		p = buf + DOSPARTOFF + index * DOSPARTSIZE;
408		if (p[0] != 0 && p[0] != 0x80)
409			goto out;
410	}
411
412	/*
413	 * If the partition table does not consist of all zeroes,
414	 * assume we have a MBR. If it's all zeroes, we could have
415	 * a boot sector. For example, a boot sector that doesn't
416	 * have boot code -- common on non-i386 hardware. In that
417	 * case we check if we have a possible BPB. If so, then we
418	 * assume we have a boot sector instead.
419	 */
420	sum = 0;
421	for (index = 0; index < NDOSPART * DOSPARTSIZE; index++)
422		sum += buf[DOSPARTOFF + index];
423	if (sum != 0 || !mbr_probe_bpb(buf + 0x0b))
424		res = G_PART_PROBE_PRI_NORM;
425
426 out:
427	g_free(buf);
428	return (res);
429}
430
431static int
432g_part_mbr_read(struct g_part_table *basetable, struct g_consumer *cp)
433{
434	struct dos_partition ent;
435	struct g_provider *pp;
436	struct g_part_mbr_table *table;
437	struct g_part_mbr_entry *entry;
438	u_char *buf, *p;
439	off_t chs, msize, first;
440	u_int sectors, heads;
441	int error, index;
442
443	pp = cp->provider;
444	table = (struct g_part_mbr_table *)basetable;
445	first = basetable->gpt_sectors;
446	msize = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX);
447
448	buf = g_read_data(cp, 0L, pp->sectorsize, &error);
449	if (buf == NULL)
450		return (error);
451
452	bcopy(buf, table->mbr, sizeof(table->mbr));
453	for (index = NDOSPART - 1; index >= 0; index--) {
454		p = buf + DOSPARTOFF + index * DOSPARTSIZE;
455		ent.dp_flag = p[0];
456		ent.dp_shd = p[1];
457		ent.dp_ssect = p[2];
458		ent.dp_scyl = p[3];
459		ent.dp_typ = p[4];
460		ent.dp_ehd = p[5];
461		ent.dp_esect = p[6];
462		ent.dp_ecyl = p[7];
463		ent.dp_start = le32dec(p + 8);
464		ent.dp_size = le32dec(p + 12);
465		if (ent.dp_typ == 0 || ent.dp_typ == DOSPTYP_PMBR)
466			continue;
467		if (ent.dp_start == 0 || ent.dp_size == 0)
468			continue;
469		sectors = ent.dp_esect & 0x3f;
470		if (sectors > basetable->gpt_sectors &&
471		    !basetable->gpt_fixgeom) {
472			g_part_geometry_heads(msize, sectors, &chs, &heads);
473			if (chs != 0) {
474				basetable->gpt_sectors = sectors;
475				basetable->gpt_heads = heads;
476			}
477		}
478		if (ent.dp_start < first)
479			first = ent.dp_start;
480		entry = (struct g_part_mbr_entry *)g_part_new_entry(basetable,
481		    index + 1, ent.dp_start, ent.dp_start + ent.dp_size - 1);
482		entry->ent = ent;
483	}
484
485	basetable->gpt_entries = NDOSPART;
486	basetable->gpt_first = basetable->gpt_sectors;
487	basetable->gpt_last = msize - 1;
488
489	if (first < basetable->gpt_first)
490		basetable->gpt_first = 1;
491
492	g_free(buf);
493	return (0);
494}
495
496static int
497g_part_mbr_setunset(struct g_part_table *table, struct g_part_entry *baseentry,
498    const char *attrib, unsigned int set)
499{
500	struct g_part_entry *iter;
501	struct g_part_mbr_entry *entry;
502	int changed;
503
504	if (baseentry == NULL)
505		return (ENODEV);
506	if (strcasecmp(attrib, "active") != 0)
507		return (EINVAL);
508
509	/* Only one entry can have the active attribute. */
510	LIST_FOREACH(iter, &table->gpt_entry, gpe_entry) {
511		if (iter->gpe_deleted)
512			continue;
513		changed = 0;
514		entry = (struct g_part_mbr_entry *)iter;
515		if (iter == baseentry) {
516			if (set && (entry->ent.dp_flag & 0x80) == 0) {
517				entry->ent.dp_flag |= 0x80;
518				changed = 1;
519			} else if (!set && (entry->ent.dp_flag & 0x80)) {
520				entry->ent.dp_flag &= ~0x80;
521				changed = 1;
522			}
523		} else {
524			if (set && (entry->ent.dp_flag & 0x80)) {
525				entry->ent.dp_flag &= ~0x80;
526				changed = 1;
527			}
528		}
529		if (changed && !iter->gpe_created)
530			iter->gpe_modified = 1;
531	}
532	return (0);
533}
534
535static const char *
536g_part_mbr_type(struct g_part_table *basetable, struct g_part_entry *baseentry,
537    char *buf, size_t bufsz)
538{
539	struct g_part_mbr_entry *entry;
540	int i;
541
542	entry = (struct g_part_mbr_entry *)baseentry;
543	for (i = 0;
544	    i < sizeof(mbr_alias_match) / sizeof(mbr_alias_match[0]); i++) {
545		if (mbr_alias_match[i].typ == entry->ent.dp_typ)
546			return (g_part_alias_name(mbr_alias_match[i].alias));
547	}
548	snprintf(buf, bufsz, "!%d", entry->ent.dp_typ);
549	return (buf);
550}
551
552static int
553g_part_mbr_write(struct g_part_table *basetable, struct g_consumer *cp)
554{
555	struct g_part_entry *baseentry;
556	struct g_part_mbr_entry *entry;
557	struct g_part_mbr_table *table;
558	u_char *p;
559	int error, index;
560
561	table = (struct g_part_mbr_table *)basetable;
562	baseentry = LIST_FIRST(&basetable->gpt_entry);
563	for (index = 1; index <= basetable->gpt_entries; index++) {
564		p = table->mbr + DOSPARTOFF + (index - 1) * DOSPARTSIZE;
565		entry = (baseentry != NULL && index == baseentry->gpe_index)
566		    ? (struct g_part_mbr_entry *)baseentry : NULL;
567		if (entry != NULL && !baseentry->gpe_deleted) {
568			p[0] = entry->ent.dp_flag;
569			p[1] = entry->ent.dp_shd;
570			p[2] = entry->ent.dp_ssect;
571			p[3] = entry->ent.dp_scyl;
572			p[4] = entry->ent.dp_typ;
573			p[5] = entry->ent.dp_ehd;
574			p[6] = entry->ent.dp_esect;
575			p[7] = entry->ent.dp_ecyl;
576			le32enc(p + 8, entry->ent.dp_start);
577			le32enc(p + 12, entry->ent.dp_size);
578		} else
579			bzero(p, DOSPARTSIZE);
580
581		if (entry != NULL)
582			baseentry = LIST_NEXT(baseentry, gpe_entry);
583	}
584
585	error = g_write_data(cp, 0, table->mbr, cp->provider->sectorsize);
586	return (error);
587}
588