disk.c revision 241053
1/*-
2 * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
3 * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/boot/common/disk.c 241053 2012-09-29 16:47:56Z ae $");
30
31#include <sys/disk.h>
32#include <sys/queue.h>
33#include <stand.h>
34#include <stdarg.h>
35#include <bootstrap.h>
36#include <part.h>
37
38#include "disk.h"
39
40#ifdef DISK_DEBUG
41# define DEBUG(fmt, args...)	printf("%s: " fmt "\n" , __func__ , ## args)
42#else
43# define DEBUG(fmt, args...)
44#endif
45
46struct open_disk {
47	struct ptable		*table;
48	off_t			mediasize;
49	u_int			sectorsize;
50	int			rcnt;
51};
52
53struct print_args {
54	struct disk_devdesc	*dev;
55	const char		*prefix;
56	int			verbose;
57};
58
59struct dentry {
60	const struct devsw	*d_dev;
61	int			d_unit;
62	int			d_slice;
63	int			d_partition;
64
65	struct open_disk	*od;
66	off_t			d_offset;
67	STAILQ_ENTRY(dentry)	entry;
68#ifdef DISK_DEBUG
69	uint32_t		count;
70#endif
71};
72
73static STAILQ_HEAD(, dentry) opened_disks =
74    STAILQ_HEAD_INITIALIZER(opened_disks);
75
76static int
77disk_lookup(struct disk_devdesc *dev)
78{
79	struct dentry *entry;
80	int rc;
81
82	rc = ENOENT;
83	STAILQ_FOREACH(entry, &opened_disks, entry) {
84		if (entry->d_dev != dev->d_dev ||
85		    entry->d_unit != dev->d_unit)
86			continue;
87		dev->d_opendata = entry->od;
88		if (entry->d_slice == dev->d_slice &&
89		    entry->d_partition == dev->d_partition) {
90			dev->d_offset = entry->d_offset;
91			DEBUG("%s offset %lld", disk_fmtdev(dev),
92			    dev->d_offset);
93#ifdef DISK_DEBUG
94			entry->count++;
95#endif
96			return (0);
97		}
98		rc = EAGAIN;
99	}
100	return (rc);
101}
102
103static void
104disk_insert(struct disk_devdesc *dev)
105{
106	struct dentry *entry;
107
108	entry = (struct dentry *)malloc(sizeof(struct dentry));
109	if (entry == NULL) {
110		DEBUG("no memory");
111		return;
112	}
113	entry->d_dev = dev->d_dev;
114	entry->d_unit = dev->d_unit;
115	entry->d_slice = dev->d_slice;
116	entry->d_partition = dev->d_partition;
117	entry->od = (struct open_disk *)dev->d_opendata;
118	entry->od->rcnt++;
119	entry->d_offset = dev->d_offset;
120#ifdef DISK_DEBUG
121	entry->count = 1;
122#endif
123	STAILQ_INSERT_TAIL(&opened_disks, entry, entry);
124	DEBUG("%s cached", disk_fmtdev(dev));
125}
126
127#ifdef DISK_DEBUG
128COMMAND_SET(dcachestat, "dcachestat", "get disk cache stats",
129    command_dcachestat);
130
131static int
132command_dcachestat(int argc, char *argv[])
133{
134	struct disk_devdesc dev;
135	struct dentry *entry;
136
137	STAILQ_FOREACH(entry, &opened_disks, entry) {
138		dev.d_dev = (struct devsw *)entry->d_dev;
139		dev.d_unit = entry->d_unit;
140		dev.d_slice = entry->d_slice;
141		dev.d_partition = entry->d_partition;
142		printf("%s %d => %p [%d]\n", disk_fmtdev(&dev), entry->count,
143		    entry->od, entry->od->rcnt);
144	}
145	return (CMD_OK);
146}
147#endif /* DISK_DEBUG */
148
149/* Convert size to a human-readable number. */
150static char *
151display_size(uint64_t size, u_int sectorsize)
152{
153	static char buf[80];
154	char unit;
155
156	size = size * sectorsize / 1024;
157	unit = 'K';
158	if (size >= 10485760000LL) {
159		size /= 1073741824;
160		unit = 'T';
161	} else if (size >= 10240000) {
162		size /= 1048576;
163		unit = 'G';
164	} else if (size >= 10000) {
165		size /= 1024;
166		unit = 'M';
167	}
168	sprintf(buf, "%ld%cB", (long)size, unit);
169	return (buf);
170}
171
172static int
173ptblread(void *d, void *buf, size_t blocks, off_t offset)
174{
175	struct disk_devdesc *dev;
176	struct open_disk *od;
177
178	dev = (struct disk_devdesc *)d;
179	od = (struct open_disk *)dev->d_opendata;
180	return (dev->d_dev->dv_strategy(dev, F_READ, offset,
181	    blocks * od->sectorsize, (char *)buf, NULL));
182}
183
184#define	PWIDTH	35
185static void
186ptable_print(void *arg, const char *pname, const struct ptable_entry *part)
187{
188	struct print_args *pa, bsd;
189	struct open_disk *od;
190	struct ptable *table;
191	char line[80];
192
193	pa = (struct print_args *)arg;
194	od = (struct open_disk *)pa->dev->d_opendata;
195	sprintf(line, "  %s%s: %s", pa->prefix, pname,
196	    parttype2str(part->type));
197	if (pa->verbose)
198		sprintf(line, "%-*s%s", PWIDTH, line,
199		    display_size(part->end - part->start + 1,
200		    od->sectorsize));
201	strcat(line, "\n");
202	pager_output(line);
203	if (part->type == PART_FREEBSD) {
204		/* Open slice with BSD label */
205		pa->dev->d_offset = part->start;
206		table = ptable_open(pa->dev, part->end - part->start + 1,
207		    od->sectorsize, ptblread);
208		if (table == NULL)
209			return;
210		sprintf(line, "  %s%s", pa->prefix, pname);
211		bsd.dev = pa->dev;
212		bsd.prefix = line;
213		bsd.verbose = pa->verbose;
214		ptable_iterate(table, &bsd, ptable_print);
215		ptable_close(table);
216	}
217}
218#undef PWIDTH
219
220void
221disk_print(struct disk_devdesc *dev, char *prefix, int verbose)
222{
223	struct open_disk *od;
224	struct print_args pa;
225
226	/* Disk should be opened */
227	od = (struct open_disk *)dev->d_opendata;
228	pa.dev = dev;
229	pa.prefix = prefix;
230	pa.verbose = verbose;
231	ptable_iterate(od->table, &pa, ptable_print);
232}
233
234int
235disk_open(struct disk_devdesc *dev, off_t mediasize, u_int sectorsize)
236{
237	struct open_disk *od;
238	struct ptable *table;
239	struct ptable_entry part;
240	int rc, slice, partition;
241
242	rc = disk_lookup(dev);
243	if (rc == 0)
244		return (0);
245	/*
246	 * While we are reading disk metadata, make sure we do it relative
247	 * to the start of the disk
248	 */
249	dev->d_offset = 0;
250	table = NULL;
251	slice = dev->d_slice;
252	partition = dev->d_partition;
253	if (rc == EAGAIN) {
254		/*
255		 * This entire disk was already opened and there is no
256		 * need to allocate new open_disk structure and open the
257		 * main partition table.
258		 */
259		od = (struct open_disk *)dev->d_opendata;
260		DEBUG("%s unit %d, slice %d, partition %d => %p (cached)",
261		    disk_fmtdev(dev), dev->d_unit, dev->d_slice,
262		    dev->d_partition, od);
263		goto opened;
264	} else {
265		od = (struct open_disk *)malloc(sizeof(struct open_disk));
266		if (od == NULL) {
267			DEBUG("no memory");
268			return (ENOMEM);
269		}
270	}
271	dev->d_opendata = od;
272	od->mediasize = mediasize;
273	od->sectorsize = sectorsize;
274	od->rcnt = 0;
275	DEBUG("%s unit %d, slice %d, partition %d => %p",
276	    disk_fmtdev(dev), dev->d_unit, dev->d_slice, dev->d_partition, od);
277
278	/* Determine disk layout. */
279	od->table = ptable_open(dev, mediasize / sectorsize, sectorsize,
280	    ptblread);
281	if (od->table == NULL) {
282		DEBUG("Can't read partition table");
283		rc = ENXIO;
284		goto out;
285	}
286opened:
287	rc = 0;
288	if (ptable_gettype(od->table) == PTABLE_BSD &&
289	    partition >= 0) {
290		/* It doesn't matter what value has d_slice */
291		rc = ptable_getpart(od->table, &part, partition);
292		if (rc == 0)
293			dev->d_offset = part.start;
294	} else if (slice >= 0) {
295		/* Try to get information about partition */
296		if (slice == 0)
297			rc = ptable_getbestpart(od->table, &part);
298		else
299			rc = ptable_getpart(od->table, &part, slice);
300		if (rc != 0) /* Partition doesn't exist */
301			goto out;
302		dev->d_offset = part.start;
303		slice = part.index;
304		if (ptable_gettype(od->table) == PTABLE_GPT) {
305			partition = 255;
306			goto out; /* Nothing more to do */
307		}
308		/*
309		 * If d_partition < 0 and we are looking at a BSD slice,
310		 * then try to read BSD label, otherwise return the
311		 * whole MBR slice.
312		 */
313		if (partition == -1 &&
314		    part.type != PART_FREEBSD)
315			goto out;
316		/* Try to read BSD label */
317		table = ptable_open(dev, part.end - part.start + 1,
318		    od->sectorsize, ptblread);
319		if (table == NULL) {
320			DEBUG("Can't read BSD label");
321			rc = ENXIO;
322			goto out;
323		}
324		/*
325		 * If slice contains BSD label and d_partition < 0, then
326		 * assume the 'a' partition. Otherwise just return the
327		 * whole MBR slice, because it can contain ZFS.
328		 */
329		if (partition < 0) {
330			if (ptable_gettype(table) != PTABLE_BSD)
331				goto out;
332			partition = 0;
333		}
334		rc = ptable_getpart(table, &part, partition);
335		if (rc != 0)
336			goto out;
337		dev->d_offset += part.start;
338	}
339out:
340	if (table != NULL)
341		ptable_close(table);
342
343	if (rc != 0) {
344		if (od->rcnt < 1) {
345			if (od->table != NULL)
346				ptable_close(od->table);
347			free(od);
348		}
349		DEBUG("%s could not open", disk_fmtdev(dev));
350	} else {
351		disk_insert(dev);
352		/* Save the slice and partition number to the dev */
353		dev->d_slice = slice;
354		dev->d_partition = partition;
355		DEBUG("%s offset %lld => %p", disk_fmtdev(dev),
356		    dev->d_offset, od);
357	}
358	return (rc);
359}
360
361int
362disk_close(struct disk_devdesc *dev)
363{
364#if DISK_DEBUG
365	struct open_disk *od;
366
367	od = (struct open_disk *)dev->d_opendata;
368	DEBUG("%s closed => %p [%d]", disk_fmtdev(dev), od, od->rcnt);
369#endif
370	return (0);
371}
372
373void
374disk_cleanup(const struct devsw *d_dev)
375{
376	struct disk_devdesc dev;
377	struct dentry *entry, *tmp;
378
379	STAILQ_FOREACH_SAFE(entry, &opened_disks, entry, tmp) {
380		if (entry->d_dev != d_dev)
381			continue;
382		entry->od->rcnt--;
383#ifdef DISK_DEBUG
384		dev.d_dev = (struct devsw *)entry->d_dev;
385		dev.d_unit = entry->d_unit;
386		dev.d_slice = entry->d_slice;
387		dev.d_partition = entry->d_partition;
388		STAILQ_REMOVE(&opened_disks, entry, dentry, entry);
389		DEBUG("%s was freed => %p [%d]", disk_fmtdev(&dev),
390		    entry->od, entry->od->rcnt);
391#endif
392		if (entry->od->rcnt < 1) {
393			if (entry->od->table != NULL)
394				ptable_close(entry->od->table);
395			free(entry->od);
396		}
397		free(entry);
398	}
399}
400
401char*
402disk_fmtdev(struct disk_devdesc *dev)
403{
404	static char buf[128];
405	char *cp;
406
407	cp = buf + sprintf(buf, "%s%d", dev->d_dev->dv_name, dev->d_unit);
408	if (dev->d_slice >= 0) {
409#ifdef LOADER_GPT_SUPPORT
410		if (dev->d_partition == 255) {
411			sprintf(cp, "p%d:", dev->d_slice);
412			return (buf);
413		} else
414#endif
415#ifdef LOADER_MBR_SUPPORT
416			cp += sprintf(cp, "s%d", dev->d_slice);
417#endif
418	}
419	if (dev->d_partition >= 0)
420		cp += sprintf(cp, "%c", dev->d_partition + 'a');
421	strcat(cp, ":");
422	return (buf);
423}
424
425int
426disk_parsedev(struct disk_devdesc *dev, const char *devspec, const char **path)
427{
428	int unit, slice, partition;
429	const char *np;
430	char *cp;
431
432	np = devspec;
433	unit = slice = partition = -1;
434	if (*np != '\0' && *np != ':') {
435		unit = strtol(np, &cp, 10);
436		if (cp == np)
437			return (EUNIT);
438#ifdef LOADER_GPT_SUPPORT
439		if (*cp == 'p') {
440			np = cp + 1;
441			slice = strtol(np, &cp, 10);
442			if (np == cp)
443				return (ESLICE);
444			/* we don't support nested partitions on GPT */
445			if (*cp != '\0' && *cp != ':')
446				return (EINVAL);
447			partition = 255;
448		} else
449#endif
450#ifdef LOADER_MBR_SUPPORT
451		if (*cp == 's') {
452			np = cp + 1;
453			slice = strtol(np, &cp, 10);
454			if (np == cp)
455				return (ESLICE);
456		}
457#endif
458		if (*cp != '\0' && *cp != ':') {
459			partition = *cp - 'a';
460			if (partition < 0)
461				return (EPART);
462			cp++;
463		}
464	} else
465		return (EINVAL);
466
467	if (*cp != '\0' && *cp != ':')
468		return (EINVAL);
469	dev->d_unit = unit;
470	dev->d_slice = slice;
471	dev->d_partition = partition;
472	if (path != NULL)
473		*path = (*cp == '\0') ? cp: cp + 1;
474	return (0);
475}
476