1223695Sdfr/*-
2223695Sdfr * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
3239058Sae * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
4223695Sdfr * All rights reserved.
5223695Sdfr *
6223695Sdfr * Redistribution and use in source and binary forms, with or without
7223695Sdfr * modification, are permitted provided that the following conditions
8223695Sdfr * are met:
9223695Sdfr * 1. Redistributions of source code must retain the above copyright
10223695Sdfr *    notice, this list of conditions and the following disclaimer.
11223695Sdfr * 2. Redistributions in binary form must reproduce the above copyright
12223695Sdfr *    notice, this list of conditions and the following disclaimer in the
13223695Sdfr *    documentation and/or other materials provided with the distribution.
14223695Sdfr *
15223695Sdfr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16223695Sdfr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17223695Sdfr * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18223695Sdfr * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19223695Sdfr * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20223695Sdfr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21223695Sdfr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22223695Sdfr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23223695Sdfr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24223695Sdfr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25223695Sdfr * SUCH DAMAGE.
26223695Sdfr */
27223695Sdfr
28223695Sdfr#include <sys/cdefs.h>
29223695Sdfr__FBSDID("$FreeBSD: releng/10.3/sys/boot/common/disk.c 241876 2012-10-22 11:01:43Z ae $");
30223695Sdfr
31239058Sae#include <sys/disk.h>
32241053Sae#include <sys/queue.h>
33223695Sdfr#include <stand.h>
34223695Sdfr#include <stdarg.h>
35223695Sdfr#include <bootstrap.h>
36239058Sae#include <part.h>
37223695Sdfr
38223695Sdfr#include "disk.h"
39223695Sdfr
40223695Sdfr#ifdef DISK_DEBUG
41223695Sdfr# define DEBUG(fmt, args...)	printf("%s: " fmt "\n" , __func__ , ## args)
42223695Sdfr#else
43223695Sdfr# define DEBUG(fmt, args...)
44223695Sdfr#endif
45223695Sdfr
46239058Saestruct open_disk {
47239058Sae	struct ptable		*table;
48239058Sae	off_t			mediasize;
49239058Sae	u_int			sectorsize;
50241809Sae	u_int			flags;
51241053Sae	int			rcnt;
52239058Sae};
53223695Sdfr
54239058Saestruct print_args {
55239058Sae	struct disk_devdesc	*dev;
56239058Sae	const char		*prefix;
57239058Sae	int			verbose;
58223695Sdfr};
59223695Sdfr
60241053Saestruct dentry {
61241053Sae	const struct devsw	*d_dev;
62241053Sae	int			d_unit;
63241053Sae	int			d_slice;
64241053Sae	int			d_partition;
65241053Sae
66241053Sae	struct open_disk	*od;
67241053Sae	off_t			d_offset;
68241053Sae	STAILQ_ENTRY(dentry)	entry;
69241053Sae#ifdef DISK_DEBUG
70241053Sae	uint32_t		count;
71241053Sae#endif
72241053Sae};
73241053Sae
74241053Saestatic STAILQ_HEAD(, dentry) opened_disks =
75241053Sae    STAILQ_HEAD_INITIALIZER(opened_disks);
76241053Sae
77241053Saestatic int
78241053Saedisk_lookup(struct disk_devdesc *dev)
79241053Sae{
80241053Sae	struct dentry *entry;
81241053Sae	int rc;
82241053Sae
83241053Sae	rc = ENOENT;
84241053Sae	STAILQ_FOREACH(entry, &opened_disks, entry) {
85241053Sae		if (entry->d_dev != dev->d_dev ||
86241053Sae		    entry->d_unit != dev->d_unit)
87241053Sae			continue;
88241053Sae		dev->d_opendata = entry->od;
89241053Sae		if (entry->d_slice == dev->d_slice &&
90241053Sae		    entry->d_partition == dev->d_partition) {
91241053Sae			dev->d_offset = entry->d_offset;
92241053Sae			DEBUG("%s offset %lld", disk_fmtdev(dev),
93241053Sae			    dev->d_offset);
94241053Sae#ifdef DISK_DEBUG
95241053Sae			entry->count++;
96241053Sae#endif
97241053Sae			return (0);
98241053Sae		}
99241053Sae		rc = EAGAIN;
100241053Sae	}
101241053Sae	return (rc);
102241053Sae}
103241053Sae
104241053Saestatic void
105241053Saedisk_insert(struct disk_devdesc *dev)
106241053Sae{
107241053Sae	struct dentry *entry;
108241053Sae
109241053Sae	entry = (struct dentry *)malloc(sizeof(struct dentry));
110241053Sae	if (entry == NULL) {
111241053Sae		DEBUG("no memory");
112241053Sae		return;
113241053Sae	}
114241053Sae	entry->d_dev = dev->d_dev;
115241053Sae	entry->d_unit = dev->d_unit;
116241053Sae	entry->d_slice = dev->d_slice;
117241053Sae	entry->d_partition = dev->d_partition;
118241053Sae	entry->od = (struct open_disk *)dev->d_opendata;
119241053Sae	entry->od->rcnt++;
120241053Sae	entry->d_offset = dev->d_offset;
121241053Sae#ifdef DISK_DEBUG
122241053Sae	entry->count = 1;
123241053Sae#endif
124241053Sae	STAILQ_INSERT_TAIL(&opened_disks, entry, entry);
125241053Sae	DEBUG("%s cached", disk_fmtdev(dev));
126241053Sae}
127241053Sae
128241053Sae#ifdef DISK_DEBUG
129241053SaeCOMMAND_SET(dcachestat, "dcachestat", "get disk cache stats",
130241053Sae    command_dcachestat);
131241053Sae
132241053Saestatic int
133241053Saecommand_dcachestat(int argc, char *argv[])
134241053Sae{
135241053Sae	struct disk_devdesc dev;
136241053Sae	struct dentry *entry;
137241053Sae
138241053Sae	STAILQ_FOREACH(entry, &opened_disks, entry) {
139241053Sae		dev.d_dev = (struct devsw *)entry->d_dev;
140241053Sae		dev.d_unit = entry->d_unit;
141241053Sae		dev.d_slice = entry->d_slice;
142241053Sae		dev.d_partition = entry->d_partition;
143241053Sae		printf("%s %d => %p [%d]\n", disk_fmtdev(&dev), entry->count,
144241053Sae		    entry->od, entry->od->rcnt);
145241053Sae	}
146241053Sae	return (CMD_OK);
147241053Sae}
148241053Sae#endif /* DISK_DEBUG */
149241053Sae
150239058Sae/* Convert size to a human-readable number. */
151223695Sdfrstatic char *
152239058Saedisplay_size(uint64_t size, u_int sectorsize)
153223695Sdfr{
154223695Sdfr	static char buf[80];
155223695Sdfr	char unit;
156223695Sdfr
157239058Sae	size = size * sectorsize / 1024;
158223695Sdfr	unit = 'K';
159223695Sdfr	if (size >= 10485760000LL) {
160223695Sdfr		size /= 1073741824;
161223695Sdfr		unit = 'T';
162223695Sdfr	} else if (size >= 10240000) {
163223695Sdfr		size /= 1048576;
164223695Sdfr		unit = 'G';
165223695Sdfr	} else if (size >= 10000) {
166223695Sdfr		size /= 1024;
167223695Sdfr		unit = 'M';
168223695Sdfr	}
169239058Sae	sprintf(buf, "%ld%cB", (long)size, unit);
170223695Sdfr	return (buf);
171223695Sdfr}
172223695Sdfr
173223695Sdfrstatic int
174239058Saeptblread(void *d, void *buf, size_t blocks, off_t offset)
175223695Sdfr{
176239058Sae	struct disk_devdesc *dev;
177239058Sae	struct open_disk *od;
178223695Sdfr
179239058Sae	dev = (struct disk_devdesc *)d;
180239058Sae	od = (struct open_disk *)dev->d_opendata;
181239058Sae	return (dev->d_dev->dv_strategy(dev, F_READ, offset,
182239058Sae	    blocks * od->sectorsize, (char *)buf, NULL));
183223695Sdfr}
184223695Sdfr
185239058Sae#define	PWIDTH	35
186223695Sdfrstatic void
187239058Saeptable_print(void *arg, const char *pname, const struct ptable_entry *part)
188223695Sdfr{
189239058Sae	struct print_args *pa, bsd;
190239058Sae	struct open_disk *od;
191239058Sae	struct ptable *table;
192239058Sae	char line[80];
193223695Sdfr
194239058Sae	pa = (struct print_args *)arg;
195239058Sae	od = (struct open_disk *)pa->dev->d_opendata;
196239058Sae	sprintf(line, "  %s%s: %s", pa->prefix, pname,
197239058Sae	    parttype2str(part->type));
198239058Sae	if (pa->verbose)
199239058Sae		sprintf(line, "%-*s%s", PWIDTH, line,
200239058Sae		    display_size(part->end - part->start + 1,
201239058Sae		    od->sectorsize));
202239058Sae	strcat(line, "\n");
203239058Sae	pager_output(line);
204239058Sae	if (part->type == PART_FREEBSD) {
205239058Sae		/* Open slice with BSD label */
206239058Sae		pa->dev->d_offset = part->start;
207239058Sae		table = ptable_open(pa->dev, part->end - part->start + 1,
208239058Sae		    od->sectorsize, ptblread);
209239058Sae		if (table == NULL)
210239058Sae			return;
211239058Sae		sprintf(line, "  %s%s", pa->prefix, pname);
212239058Sae		bsd.dev = pa->dev;
213239058Sae		bsd.prefix = line;
214239058Sae		bsd.verbose = pa->verbose;
215239058Sae		ptable_iterate(table, &bsd, ptable_print);
216239058Sae		ptable_close(table);
217223695Sdfr	}
218223695Sdfr}
219239058Sae#undef PWIDTH
220223695Sdfr
221239058Saevoid
222239058Saedisk_print(struct disk_devdesc *dev, char *prefix, int verbose)
223223695Sdfr{
224239058Sae	struct open_disk *od;
225239058Sae	struct print_args pa;
226223695Sdfr
227239058Sae	/* Disk should be opened */
228239058Sae	od = (struct open_disk *)dev->d_opendata;
229239058Sae	pa.dev = dev;
230239058Sae	pa.prefix = prefix;
231239058Sae	pa.verbose = verbose;
232239058Sae	ptable_iterate(od->table, &pa, ptable_print);
233223695Sdfr}
234223695Sdfr
235239058Saeint
236241809Saedisk_open(struct disk_devdesc *dev, off_t mediasize, u_int sectorsize,
237241809Sae    u_int flags)
238223695Sdfr{
239239058Sae	struct open_disk *od;
240239058Sae	struct ptable *table;
241239058Sae	struct ptable_entry part;
242241053Sae	int rc, slice, partition;
243223695Sdfr
244241809Sae	rc = 0;
245241809Sae	if ((flags & DISK_F_NOCACHE) == 0) {
246241809Sae		rc = disk_lookup(dev);
247241809Sae		if (rc == 0)
248241809Sae			return (0);
249241809Sae	}
250223695Sdfr	/*
251239058Sae	 * While we are reading disk metadata, make sure we do it relative
252239058Sae	 * to the start of the disk
253223695Sdfr	 */
254241053Sae	dev->d_offset = 0;
255239058Sae	table = NULL;
256241053Sae	slice = dev->d_slice;
257241053Sae	partition = dev->d_partition;
258241053Sae	if (rc == EAGAIN) {
259241053Sae		/*
260241053Sae		 * This entire disk was already opened and there is no
261241053Sae		 * need to allocate new open_disk structure and open the
262241053Sae		 * main partition table.
263241053Sae		 */
264241053Sae		od = (struct open_disk *)dev->d_opendata;
265241053Sae		DEBUG("%s unit %d, slice %d, partition %d => %p (cached)",
266241053Sae		    disk_fmtdev(dev), dev->d_unit, dev->d_slice,
267241053Sae		    dev->d_partition, od);
268241053Sae		goto opened;
269241053Sae	} else {
270241053Sae		od = (struct open_disk *)malloc(sizeof(struct open_disk));
271241053Sae		if (od == NULL) {
272241053Sae			DEBUG("no memory");
273241053Sae			return (ENOMEM);
274241053Sae		}
275241809Sae		dev->d_opendata = od;
276241809Sae		od->rcnt = 0;
277241053Sae	}
278239058Sae	od->mediasize = mediasize;
279239058Sae	od->sectorsize = sectorsize;
280241809Sae	od->flags = flags;
281241053Sae	DEBUG("%s unit %d, slice %d, partition %d => %p",
282241053Sae	    disk_fmtdev(dev), dev->d_unit, dev->d_slice, dev->d_partition, od);
283223695Sdfr
284239058Sae	/* Determine disk layout. */
285239058Sae	od->table = ptable_open(dev, mediasize / sectorsize, sectorsize,
286239058Sae	    ptblread);
287239058Sae	if (od->table == NULL) {
288239058Sae		DEBUG("Can't read partition table");
289239058Sae		rc = ENXIO;
290223695Sdfr		goto out;
291223695Sdfr	}
292241053Saeopened:
293241053Sae	rc = 0;
294239230Sae	if (ptable_gettype(od->table) == PTABLE_BSD &&
295241053Sae	    partition >= 0) {
296239230Sae		/* It doesn't matter what value has d_slice */
297241053Sae		rc = ptable_getpart(od->table, &part, partition);
298239230Sae		if (rc == 0)
299239230Sae			dev->d_offset = part.start;
300241053Sae	} else if (slice >= 0) {
301239058Sae		/* Try to get information about partition */
302241053Sae		if (slice == 0)
303241023Sae			rc = ptable_getbestpart(od->table, &part);
304241023Sae		else
305241053Sae			rc = ptable_getpart(od->table, &part, slice);
306239058Sae		if (rc != 0) /* Partition doesn't exist */
307223695Sdfr			goto out;
308239058Sae		dev->d_offset = part.start;
309241053Sae		slice = part.index;
310241053Sae		if (ptable_gettype(od->table) == PTABLE_GPT) {
311241053Sae			partition = 255;
312241053Sae			goto out; /* Nothing more to do */
313241876Sae		} else if (partition == 255) {
314241876Sae			/*
315241876Sae			 * When we try to open GPT partition, but partition
316241876Sae			 * table isn't GPT, reset d_partition value to -1
317241876Sae			 * and try to autodetect appropriate value.
318241876Sae			 */
319241876Sae			partition = -1;
320241023Sae		}
321239293Sae		/*
322239293Sae		 * If d_partition < 0 and we are looking at a BSD slice,
323239293Sae		 * then try to read BSD label, otherwise return the
324239293Sae		 * whole MBR slice.
325239293Sae		 */
326241053Sae		if (partition == -1 &&
327239293Sae		    part.type != PART_FREEBSD)
328239293Sae			goto out;
329239058Sae		/* Try to read BSD label */
330239058Sae		table = ptable_open(dev, part.end - part.start + 1,
331239058Sae		    od->sectorsize, ptblread);
332239058Sae		if (table == NULL) {
333239058Sae			DEBUG("Can't read BSD label");
334239058Sae			rc = ENXIO;
335223695Sdfr			goto out;
336223695Sdfr		}
337239293Sae		/*
338239293Sae		 * If slice contains BSD label and d_partition < 0, then
339239293Sae		 * assume the 'a' partition. Otherwise just return the
340239293Sae		 * whole MBR slice, because it can contain ZFS.
341239293Sae		 */
342241053Sae		if (partition < 0) {
343239293Sae			if (ptable_gettype(table) != PTABLE_BSD)
344239293Sae				goto out;
345241053Sae			partition = 0;
346239293Sae		}
347241053Sae		rc = ptable_getpart(table, &part, partition);
348239058Sae		if (rc != 0)
349239058Sae			goto out;
350239058Sae		dev->d_offset += part.start;
351223695Sdfr	}
352223695Sdfrout:
353239058Sae	if (table != NULL)
354239058Sae		ptable_close(table);
355239210Sae
356239058Sae	if (rc != 0) {
357241053Sae		if (od->rcnt < 1) {
358241053Sae			if (od->table != NULL)
359241053Sae				ptable_close(od->table);
360241053Sae			free(od);
361241053Sae		}
362239231Sae		DEBUG("%s could not open", disk_fmtdev(dev));
363239210Sae	} else {
364241809Sae		if ((flags & DISK_F_NOCACHE) == 0)
365241809Sae			disk_insert(dev);
366241053Sae		/* Save the slice and partition number to the dev */
367241053Sae		dev->d_slice = slice;
368241053Sae		dev->d_partition = partition;
369241053Sae		DEBUG("%s offset %lld => %p", disk_fmtdev(dev),
370241053Sae		    dev->d_offset, od);
371239058Sae	}
372223695Sdfr	return (rc);
373223695Sdfr}
374223695Sdfr
375239058Saeint
376239058Saedisk_close(struct disk_devdesc *dev)
377223695Sdfr{
378239058Sae	struct open_disk *od;
379223695Sdfr
380239058Sae	od = (struct open_disk *)dev->d_opendata;
381241053Sae	DEBUG("%s closed => %p [%d]", disk_fmtdev(dev), od, od->rcnt);
382241809Sae	if (od->flags & DISK_F_NOCACHE) {
383241809Sae		ptable_close(od->table);
384241809Sae		free(od);
385241809Sae	}
386223695Sdfr	return (0);
387223695Sdfr}
388223695Sdfr
389241053Saevoid
390241053Saedisk_cleanup(const struct devsw *d_dev)
391241053Sae{
392241065Sae#ifdef DISK_DEBUG
393241053Sae	struct disk_devdesc dev;
394241065Sae#endif
395241053Sae	struct dentry *entry, *tmp;
396241053Sae
397241053Sae	STAILQ_FOREACH_SAFE(entry, &opened_disks, entry, tmp) {
398241053Sae		if (entry->d_dev != d_dev)
399241053Sae			continue;
400241053Sae		entry->od->rcnt--;
401241053Sae#ifdef DISK_DEBUG
402241053Sae		dev.d_dev = (struct devsw *)entry->d_dev;
403241053Sae		dev.d_unit = entry->d_unit;
404241053Sae		dev.d_slice = entry->d_slice;
405241053Sae		dev.d_partition = entry->d_partition;
406241053Sae		DEBUG("%s was freed => %p [%d]", disk_fmtdev(&dev),
407241053Sae		    entry->od, entry->od->rcnt);
408241053Sae#endif
409241065Sae		STAILQ_REMOVE(&opened_disks, entry, dentry, entry);
410241053Sae		if (entry->od->rcnt < 1) {
411241053Sae			if (entry->od->table != NULL)
412241053Sae				ptable_close(entry->od->table);
413241053Sae			free(entry->od);
414241053Sae		}
415241053Sae		free(entry);
416241053Sae	}
417241053Sae}
418241053Sae
419239058Saechar*
420239058Saedisk_fmtdev(struct disk_devdesc *dev)
421223695Sdfr{
422239058Sae	static char buf[128];
423239058Sae	char *cp;
424223695Sdfr
425239058Sae	cp = buf + sprintf(buf, "%s%d", dev->d_dev->dv_name, dev->d_unit);
426241053Sae	if (dev->d_slice >= 0) {
427223695Sdfr#ifdef LOADER_GPT_SUPPORT
428239058Sae		if (dev->d_partition == 255) {
429239058Sae			sprintf(cp, "p%d:", dev->d_slice);
430239058Sae			return (buf);
431239058Sae		} else
432223695Sdfr#endif
433223712Smarius#ifdef LOADER_MBR_SUPPORT
434239058Sae			cp += sprintf(cp, "s%d", dev->d_slice);
435223712Smarius#endif
436239058Sae	}
437239230Sae	if (dev->d_partition >= 0)
438239230Sae		cp += sprintf(cp, "%c", dev->d_partition + 'a');
439239058Sae	strcat(cp, ":");
440239058Sae	return (buf);
441223695Sdfr}
442223695Sdfr
443239058Saeint
444239058Saedisk_parsedev(struct disk_devdesc *dev, const char *devspec, const char **path)
445223695Sdfr{
446239058Sae	int unit, slice, partition;
447239058Sae	const char *np;
448239058Sae	char *cp;
449223695Sdfr
450239058Sae	np = devspec;
451239058Sae	unit = slice = partition = -1;
452239058Sae	if (*np != '\0' && *np != ':') {
453239058Sae		unit = strtol(np, &cp, 10);
454239058Sae		if (cp == np)
455239058Sae			return (EUNIT);
456223695Sdfr#ifdef LOADER_GPT_SUPPORT
457239058Sae		if (*cp == 'p') {
458239058Sae			np = cp + 1;
459239058Sae			slice = strtol(np, &cp, 10);
460239058Sae			if (np == cp)
461239058Sae				return (ESLICE);
462239058Sae			/* we don't support nested partitions on GPT */
463239058Sae			if (*cp != '\0' && *cp != ':')
464239058Sae				return (EINVAL);
465239058Sae			partition = 255;
466239058Sae		} else
467223695Sdfr#endif
468223712Smarius#ifdef LOADER_MBR_SUPPORT
469239058Sae		if (*cp == 's') {
470239058Sae			np = cp + 1;
471239058Sae			slice = strtol(np, &cp, 10);
472239058Sae			if (np == cp)
473239058Sae				return (ESLICE);
474239058Sae		}
475223712Smarius#endif
476239058Sae		if (*cp != '\0' && *cp != ':') {
477239058Sae			partition = *cp - 'a';
478239058Sae			if (partition < 0)
479239058Sae				return (EPART);
480239058Sae			cp++;
481239058Sae		}
482239058Sae	} else
483239058Sae		return (EINVAL);
484239058Sae
485239058Sae	if (*cp != '\0' && *cp != ':')
486239058Sae		return (EINVAL);
487239058Sae	dev->d_unit = unit;
488239058Sae	dev->d_slice = slice;
489239058Sae	dev->d_partition = partition;
490239058Sae	if (path != NULL)
491239058Sae		*path = (*cp == '\0') ? cp: cp + 1;
492239058Sae	return (0);
493223695Sdfr}
494