disk.c revision 241809
1/*-
2 * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
3 * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/boot/common/disk.c 241809 2012-10-21 12:14:58Z ae $");
30
31#include <sys/disk.h>
32#include <sys/queue.h>
33#include <stand.h>
34#include <stdarg.h>
35#include <bootstrap.h>
36#include <part.h>
37
38#include "disk.h"
39
40#ifdef DISK_DEBUG
41# define DEBUG(fmt, args...)	printf("%s: " fmt "\n" , __func__ , ## args)
42#else
43# define DEBUG(fmt, args...)
44#endif
45
46struct open_disk {
47	struct ptable		*table;
48	off_t			mediasize;
49	u_int			sectorsize;
50	u_int			flags;
51	int			rcnt;
52};
53
54struct print_args {
55	struct disk_devdesc	*dev;
56	const char		*prefix;
57	int			verbose;
58};
59
60struct dentry {
61	const struct devsw	*d_dev;
62	int			d_unit;
63	int			d_slice;
64	int			d_partition;
65
66	struct open_disk	*od;
67	off_t			d_offset;
68	STAILQ_ENTRY(dentry)	entry;
69#ifdef DISK_DEBUG
70	uint32_t		count;
71#endif
72};
73
74static STAILQ_HEAD(, dentry) opened_disks =
75    STAILQ_HEAD_INITIALIZER(opened_disks);
76
77static int
78disk_lookup(struct disk_devdesc *dev)
79{
80	struct dentry *entry;
81	int rc;
82
83	rc = ENOENT;
84	STAILQ_FOREACH(entry, &opened_disks, entry) {
85		if (entry->d_dev != dev->d_dev ||
86		    entry->d_unit != dev->d_unit)
87			continue;
88		dev->d_opendata = entry->od;
89		if (entry->d_slice == dev->d_slice &&
90		    entry->d_partition == dev->d_partition) {
91			dev->d_offset = entry->d_offset;
92			DEBUG("%s offset %lld", disk_fmtdev(dev),
93			    dev->d_offset);
94#ifdef DISK_DEBUG
95			entry->count++;
96#endif
97			return (0);
98		}
99		rc = EAGAIN;
100	}
101	return (rc);
102}
103
104static void
105disk_insert(struct disk_devdesc *dev)
106{
107	struct dentry *entry;
108
109	entry = (struct dentry *)malloc(sizeof(struct dentry));
110	if (entry == NULL) {
111		DEBUG("no memory");
112		return;
113	}
114	entry->d_dev = dev->d_dev;
115	entry->d_unit = dev->d_unit;
116	entry->d_slice = dev->d_slice;
117	entry->d_partition = dev->d_partition;
118	entry->od = (struct open_disk *)dev->d_opendata;
119	entry->od->rcnt++;
120	entry->d_offset = dev->d_offset;
121#ifdef DISK_DEBUG
122	entry->count = 1;
123#endif
124	STAILQ_INSERT_TAIL(&opened_disks, entry, entry);
125	DEBUG("%s cached", disk_fmtdev(dev));
126}
127
128#ifdef DISK_DEBUG
129COMMAND_SET(dcachestat, "dcachestat", "get disk cache stats",
130    command_dcachestat);
131
132static int
133command_dcachestat(int argc, char *argv[])
134{
135	struct disk_devdesc dev;
136	struct dentry *entry;
137
138	STAILQ_FOREACH(entry, &opened_disks, entry) {
139		dev.d_dev = (struct devsw *)entry->d_dev;
140		dev.d_unit = entry->d_unit;
141		dev.d_slice = entry->d_slice;
142		dev.d_partition = entry->d_partition;
143		printf("%s %d => %p [%d]\n", disk_fmtdev(&dev), entry->count,
144		    entry->od, entry->od->rcnt);
145	}
146	return (CMD_OK);
147}
148#endif /* DISK_DEBUG */
149
150/* Convert size to a human-readable number. */
151static char *
152display_size(uint64_t size, u_int sectorsize)
153{
154	static char buf[80];
155	char unit;
156
157	size = size * sectorsize / 1024;
158	unit = 'K';
159	if (size >= 10485760000LL) {
160		size /= 1073741824;
161		unit = 'T';
162	} else if (size >= 10240000) {
163		size /= 1048576;
164		unit = 'G';
165	} else if (size >= 10000) {
166		size /= 1024;
167		unit = 'M';
168	}
169	sprintf(buf, "%ld%cB", (long)size, unit);
170	return (buf);
171}
172
173static int
174ptblread(void *d, void *buf, size_t blocks, off_t offset)
175{
176	struct disk_devdesc *dev;
177	struct open_disk *od;
178
179	dev = (struct disk_devdesc *)d;
180	od = (struct open_disk *)dev->d_opendata;
181	return (dev->d_dev->dv_strategy(dev, F_READ, offset,
182	    blocks * od->sectorsize, (char *)buf, NULL));
183}
184
185#define	PWIDTH	35
186static void
187ptable_print(void *arg, const char *pname, const struct ptable_entry *part)
188{
189	struct print_args *pa, bsd;
190	struct open_disk *od;
191	struct ptable *table;
192	char line[80];
193
194	pa = (struct print_args *)arg;
195	od = (struct open_disk *)pa->dev->d_opendata;
196	sprintf(line, "  %s%s: %s", pa->prefix, pname,
197	    parttype2str(part->type));
198	if (pa->verbose)
199		sprintf(line, "%-*s%s", PWIDTH, line,
200		    display_size(part->end - part->start + 1,
201		    od->sectorsize));
202	strcat(line, "\n");
203	pager_output(line);
204	if (part->type == PART_FREEBSD) {
205		/* Open slice with BSD label */
206		pa->dev->d_offset = part->start;
207		table = ptable_open(pa->dev, part->end - part->start + 1,
208		    od->sectorsize, ptblread);
209		if (table == NULL)
210			return;
211		sprintf(line, "  %s%s", pa->prefix, pname);
212		bsd.dev = pa->dev;
213		bsd.prefix = line;
214		bsd.verbose = pa->verbose;
215		ptable_iterate(table, &bsd, ptable_print);
216		ptable_close(table);
217	}
218}
219#undef PWIDTH
220
221void
222disk_print(struct disk_devdesc *dev, char *prefix, int verbose)
223{
224	struct open_disk *od;
225	struct print_args pa;
226
227	/* Disk should be opened */
228	od = (struct open_disk *)dev->d_opendata;
229	pa.dev = dev;
230	pa.prefix = prefix;
231	pa.verbose = verbose;
232	ptable_iterate(od->table, &pa, ptable_print);
233}
234
235int
236disk_open(struct disk_devdesc *dev, off_t mediasize, u_int sectorsize,
237    u_int flags)
238{
239	struct open_disk *od;
240	struct ptable *table;
241	struct ptable_entry part;
242	int rc, slice, partition;
243
244	rc = 0;
245	if ((flags & DISK_F_NOCACHE) == 0) {
246		rc = disk_lookup(dev);
247		if (rc == 0)
248			return (0);
249	}
250	/*
251	 * While we are reading disk metadata, make sure we do it relative
252	 * to the start of the disk
253	 */
254	dev->d_offset = 0;
255	table = NULL;
256	slice = dev->d_slice;
257	partition = dev->d_partition;
258	if (rc == EAGAIN) {
259		/*
260		 * This entire disk was already opened and there is no
261		 * need to allocate new open_disk structure and open the
262		 * main partition table.
263		 */
264		od = (struct open_disk *)dev->d_opendata;
265		DEBUG("%s unit %d, slice %d, partition %d => %p (cached)",
266		    disk_fmtdev(dev), dev->d_unit, dev->d_slice,
267		    dev->d_partition, od);
268		goto opened;
269	} else {
270		od = (struct open_disk *)malloc(sizeof(struct open_disk));
271		if (od == NULL) {
272			DEBUG("no memory");
273			return (ENOMEM);
274		}
275		dev->d_opendata = od;
276		od->rcnt = 0;
277	}
278	od->mediasize = mediasize;
279	od->sectorsize = sectorsize;
280	od->flags = flags;
281	DEBUG("%s unit %d, slice %d, partition %d => %p",
282	    disk_fmtdev(dev), dev->d_unit, dev->d_slice, dev->d_partition, od);
283
284	/* Determine disk layout. */
285	od->table = ptable_open(dev, mediasize / sectorsize, sectorsize,
286	    ptblread);
287	if (od->table == NULL) {
288		DEBUG("Can't read partition table");
289		rc = ENXIO;
290		goto out;
291	}
292opened:
293	rc = 0;
294	if (ptable_gettype(od->table) == PTABLE_BSD &&
295	    partition >= 0) {
296		/* It doesn't matter what value has d_slice */
297		rc = ptable_getpart(od->table, &part, partition);
298		if (rc == 0)
299			dev->d_offset = part.start;
300	} else if (slice >= 0) {
301		/* Try to get information about partition */
302		if (slice == 0)
303			rc = ptable_getbestpart(od->table, &part);
304		else
305			rc = ptable_getpart(od->table, &part, slice);
306		if (rc != 0) /* Partition doesn't exist */
307			goto out;
308		dev->d_offset = part.start;
309		slice = part.index;
310		if (ptable_gettype(od->table) == PTABLE_GPT) {
311			partition = 255;
312			goto out; /* Nothing more to do */
313		}
314		/*
315		 * If d_partition < 0 and we are looking at a BSD slice,
316		 * then try to read BSD label, otherwise return the
317		 * whole MBR slice.
318		 */
319		if (partition == -1 &&
320		    part.type != PART_FREEBSD)
321			goto out;
322		/* Try to read BSD label */
323		table = ptable_open(dev, part.end - part.start + 1,
324		    od->sectorsize, ptblread);
325		if (table == NULL) {
326			DEBUG("Can't read BSD label");
327			rc = ENXIO;
328			goto out;
329		}
330		/*
331		 * If slice contains BSD label and d_partition < 0, then
332		 * assume the 'a' partition. Otherwise just return the
333		 * whole MBR slice, because it can contain ZFS.
334		 */
335		if (partition < 0) {
336			if (ptable_gettype(table) != PTABLE_BSD)
337				goto out;
338			partition = 0;
339		}
340		rc = ptable_getpart(table, &part, partition);
341		if (rc != 0)
342			goto out;
343		dev->d_offset += part.start;
344	}
345out:
346	if (table != NULL)
347		ptable_close(table);
348
349	if (rc != 0) {
350		if (od->rcnt < 1) {
351			if (od->table != NULL)
352				ptable_close(od->table);
353			free(od);
354		}
355		DEBUG("%s could not open", disk_fmtdev(dev));
356	} else {
357		if ((flags & DISK_F_NOCACHE) == 0)
358			disk_insert(dev);
359		/* Save the slice and partition number to the dev */
360		dev->d_slice = slice;
361		dev->d_partition = partition;
362		DEBUG("%s offset %lld => %p", disk_fmtdev(dev),
363		    dev->d_offset, od);
364	}
365	return (rc);
366}
367
368int
369disk_close(struct disk_devdesc *dev)
370{
371	struct open_disk *od;
372
373	od = (struct open_disk *)dev->d_opendata;
374	DEBUG("%s closed => %p [%d]", disk_fmtdev(dev), od, od->rcnt);
375	if (od->flags & DISK_F_NOCACHE) {
376		ptable_close(od->table);
377		free(od);
378	}
379	return (0);
380}
381
382void
383disk_cleanup(const struct devsw *d_dev)
384{
385#ifdef DISK_DEBUG
386	struct disk_devdesc dev;
387#endif
388	struct dentry *entry, *tmp;
389
390	STAILQ_FOREACH_SAFE(entry, &opened_disks, entry, tmp) {
391		if (entry->d_dev != d_dev)
392			continue;
393		entry->od->rcnt--;
394#ifdef DISK_DEBUG
395		dev.d_dev = (struct devsw *)entry->d_dev;
396		dev.d_unit = entry->d_unit;
397		dev.d_slice = entry->d_slice;
398		dev.d_partition = entry->d_partition;
399		DEBUG("%s was freed => %p [%d]", disk_fmtdev(&dev),
400		    entry->od, entry->od->rcnt);
401#endif
402		STAILQ_REMOVE(&opened_disks, entry, dentry, entry);
403		if (entry->od->rcnt < 1) {
404			if (entry->od->table != NULL)
405				ptable_close(entry->od->table);
406			free(entry->od);
407		}
408		free(entry);
409	}
410}
411
412char*
413disk_fmtdev(struct disk_devdesc *dev)
414{
415	static char buf[128];
416	char *cp;
417
418	cp = buf + sprintf(buf, "%s%d", dev->d_dev->dv_name, dev->d_unit);
419	if (dev->d_slice >= 0) {
420#ifdef LOADER_GPT_SUPPORT
421		if (dev->d_partition == 255) {
422			sprintf(cp, "p%d:", dev->d_slice);
423			return (buf);
424		} else
425#endif
426#ifdef LOADER_MBR_SUPPORT
427			cp += sprintf(cp, "s%d", dev->d_slice);
428#endif
429	}
430	if (dev->d_partition >= 0)
431		cp += sprintf(cp, "%c", dev->d_partition + 'a');
432	strcat(cp, ":");
433	return (buf);
434}
435
436int
437disk_parsedev(struct disk_devdesc *dev, const char *devspec, const char **path)
438{
439	int unit, slice, partition;
440	const char *np;
441	char *cp;
442
443	np = devspec;
444	unit = slice = partition = -1;
445	if (*np != '\0' && *np != ':') {
446		unit = strtol(np, &cp, 10);
447		if (cp == np)
448			return (EUNIT);
449#ifdef LOADER_GPT_SUPPORT
450		if (*cp == 'p') {
451			np = cp + 1;
452			slice = strtol(np, &cp, 10);
453			if (np == cp)
454				return (ESLICE);
455			/* we don't support nested partitions on GPT */
456			if (*cp != '\0' && *cp != ':')
457				return (EINVAL);
458			partition = 255;
459		} else
460#endif
461#ifdef LOADER_MBR_SUPPORT
462		if (*cp == 's') {
463			np = cp + 1;
464			slice = strtol(np, &cp, 10);
465			if (np == cp)
466				return (ESLICE);
467		}
468#endif
469		if (*cp != '\0' && *cp != ':') {
470			partition = *cp - 'a';
471			if (partition < 0)
472				return (EPART);
473			cp++;
474		}
475	} else
476		return (EINVAL);
477
478	if (*cp != '\0' && *cp != ':')
479		return (EINVAL);
480	dev->d_unit = unit;
481	dev->d_slice = slice;
482	dev->d_partition = partition;
483	if (path != NULL)
484		*path = (*cp == '\0') ? cp: cp + 1;
485	return (0);
486}
487