1/*-
2 * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
3 * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD$");
30
31#include <sys/disk.h>
32#include <sys/queue.h>
33#include <stand.h>
34#include <stdarg.h>
35#include <bootstrap.h>
36#include <part.h>
37
38#include "disk.h"
39
40#ifdef DISK_DEBUG
41# define DEBUG(fmt, args...)	printf("%s: " fmt "\n" , __func__ , ## args)
42#else
43# define DEBUG(fmt, args...)
44#endif
45
46struct open_disk {
47	struct ptable		*table;
48	off_t			mediasize;
49	u_int			sectorsize;
50	u_int			flags;
51	int			rcnt;
52};
53
54struct print_args {
55	struct disk_devdesc	*dev;
56	const char		*prefix;
57	int			verbose;
58};
59
60struct dentry {
61	const struct devsw	*d_dev;
62	int			d_unit;
63	int			d_slice;
64	int			d_partition;
65
66	struct open_disk	*od;
67	off_t			d_offset;
68	STAILQ_ENTRY(dentry)	entry;
69#ifdef DISK_DEBUG
70	uint32_t		count;
71#endif
72};
73
74static STAILQ_HEAD(, dentry) opened_disks =
75    STAILQ_HEAD_INITIALIZER(opened_disks);
76
77static int
78disk_lookup(struct disk_devdesc *dev)
79{
80	struct dentry *entry;
81	int rc;
82
83	rc = ENOENT;
84	STAILQ_FOREACH(entry, &opened_disks, entry) {
85		if (entry->d_dev != dev->d_dev ||
86		    entry->d_unit != dev->d_unit)
87			continue;
88		dev->d_opendata = entry->od;
89		if (entry->d_slice == dev->d_slice &&
90		    entry->d_partition == dev->d_partition) {
91			dev->d_offset = entry->d_offset;
92			DEBUG("%s offset %lld", disk_fmtdev(dev),
93			    dev->d_offset);
94#ifdef DISK_DEBUG
95			entry->count++;
96#endif
97			return (0);
98		}
99		rc = EAGAIN;
100	}
101	return (rc);
102}
103
104static void
105disk_insert(struct disk_devdesc *dev)
106{
107	struct dentry *entry;
108
109	entry = (struct dentry *)malloc(sizeof(struct dentry));
110	if (entry == NULL) {
111		DEBUG("no memory");
112		return;
113	}
114	entry->d_dev = dev->d_dev;
115	entry->d_unit = dev->d_unit;
116	entry->d_slice = dev->d_slice;
117	entry->d_partition = dev->d_partition;
118	entry->od = (struct open_disk *)dev->d_opendata;
119	entry->od->rcnt++;
120	entry->d_offset = dev->d_offset;
121#ifdef DISK_DEBUG
122	entry->count = 1;
123#endif
124	STAILQ_INSERT_TAIL(&opened_disks, entry, entry);
125	DEBUG("%s cached", disk_fmtdev(dev));
126}
127
128#ifdef DISK_DEBUG
129COMMAND_SET(dcachestat, "dcachestat", "get disk cache stats",
130    command_dcachestat);
131
132static int
133command_dcachestat(int argc, char *argv[])
134{
135	struct disk_devdesc dev;
136	struct dentry *entry;
137
138	STAILQ_FOREACH(entry, &opened_disks, entry) {
139		dev.d_dev = (struct devsw *)entry->d_dev;
140		dev.d_unit = entry->d_unit;
141		dev.d_slice = entry->d_slice;
142		dev.d_partition = entry->d_partition;
143		printf("%s %d => %p [%d]\n", disk_fmtdev(&dev), entry->count,
144		    entry->od, entry->od->rcnt);
145	}
146	return (CMD_OK);
147}
148#endif /* DISK_DEBUG */
149
150/* Convert size to a human-readable number. */
151static char *
152display_size(uint64_t size, u_int sectorsize)
153{
154	static char buf[80];
155	char unit;
156
157	size = size * sectorsize / 1024;
158	unit = 'K';
159	if (size >= 10485760000LL) {
160		size /= 1073741824;
161		unit = 'T';
162	} else if (size >= 10240000) {
163		size /= 1048576;
164		unit = 'G';
165	} else if (size >= 10000) {
166		size /= 1024;
167		unit = 'M';
168	}
169	sprintf(buf, "%ld%cB", (long)size, unit);
170	return (buf);
171}
172
173static int
174ptblread(void *d, void *buf, size_t blocks, off_t offset)
175{
176	struct disk_devdesc *dev;
177	struct open_disk *od;
178
179	dev = (struct disk_devdesc *)d;
180	od = (struct open_disk *)dev->d_opendata;
181	return (dev->d_dev->dv_strategy(dev, F_READ, offset,
182	    blocks * od->sectorsize, (char *)buf, NULL));
183}
184
185#define	PWIDTH	35
186static void
187ptable_print(void *arg, const char *pname, const struct ptable_entry *part)
188{
189	struct print_args *pa, bsd;
190	struct open_disk *od;
191	struct ptable *table;
192	char line[80];
193
194	pa = (struct print_args *)arg;
195	od = (struct open_disk *)pa->dev->d_opendata;
196	sprintf(line, "  %s%s: %s", pa->prefix, pname,
197	    parttype2str(part->type));
198	if (pa->verbose)
199		sprintf(line, "%-*s%s", PWIDTH, line,
200		    display_size(part->end - part->start + 1,
201		    od->sectorsize));
202	strcat(line, "\n");
203	pager_output(line);
204	if (part->type == PART_FREEBSD) {
205		/* Open slice with BSD label */
206		pa->dev->d_offset = part->start;
207		table = ptable_open(pa->dev, part->end - part->start + 1,
208		    od->sectorsize, ptblread);
209		if (table == NULL)
210			return;
211		sprintf(line, "  %s%s", pa->prefix, pname);
212		bsd.dev = pa->dev;
213		bsd.prefix = line;
214		bsd.verbose = pa->verbose;
215		ptable_iterate(table, &bsd, ptable_print);
216		ptable_close(table);
217	}
218}
219#undef PWIDTH
220
221void
222disk_print(struct disk_devdesc *dev, char *prefix, int verbose)
223{
224	struct open_disk *od;
225	struct print_args pa;
226
227	/* Disk should be opened */
228	od = (struct open_disk *)dev->d_opendata;
229	pa.dev = dev;
230	pa.prefix = prefix;
231	pa.verbose = verbose;
232	ptable_iterate(od->table, &pa, ptable_print);
233}
234
235int
236disk_open(struct disk_devdesc *dev, off_t mediasize, u_int sectorsize,
237    u_int flags)
238{
239	struct open_disk *od;
240	struct ptable *table;
241	struct ptable_entry part;
242	int rc, slice, partition;
243
244	rc = 0;
245	if ((flags & DISK_F_NOCACHE) == 0) {
246		rc = disk_lookup(dev);
247		if (rc == 0)
248			return (0);
249	}
250	/*
251	 * While we are reading disk metadata, make sure we do it relative
252	 * to the start of the disk
253	 */
254	dev->d_offset = 0;
255	table = NULL;
256	slice = dev->d_slice;
257	partition = dev->d_partition;
258	if (rc == EAGAIN) {
259		/*
260		 * This entire disk was already opened and there is no
261		 * need to allocate new open_disk structure and open the
262		 * main partition table.
263		 */
264		od = (struct open_disk *)dev->d_opendata;
265		DEBUG("%s unit %d, slice %d, partition %d => %p (cached)",
266		    disk_fmtdev(dev), dev->d_unit, dev->d_slice,
267		    dev->d_partition, od);
268		goto opened;
269	} else {
270		od = (struct open_disk *)malloc(sizeof(struct open_disk));
271		if (od == NULL) {
272			DEBUG("no memory");
273			return (ENOMEM);
274		}
275		dev->d_opendata = od;
276		od->rcnt = 0;
277	}
278	od->mediasize = mediasize;
279	od->sectorsize = sectorsize;
280	od->flags = flags;
281	DEBUG("%s unit %d, slice %d, partition %d => %p",
282	    disk_fmtdev(dev), dev->d_unit, dev->d_slice, dev->d_partition, od);
283
284	/* Determine disk layout. */
285	od->table = ptable_open(dev, mediasize / sectorsize, sectorsize,
286	    ptblread);
287	if (od->table == NULL) {
288		DEBUG("Can't read partition table");
289		rc = ENXIO;
290		goto out;
291	}
292opened:
293	rc = 0;
294	if (ptable_gettype(od->table) == PTABLE_BSD &&
295	    partition >= 0) {
296		/* It doesn't matter what value has d_slice */
297		rc = ptable_getpart(od->table, &part, partition);
298		if (rc == 0)
299			dev->d_offset = part.start;
300	} else if (slice >= 0) {
301		/* Try to get information about partition */
302		if (slice == 0)
303			rc = ptable_getbestpart(od->table, &part);
304		else
305			rc = ptable_getpart(od->table, &part, slice);
306		if (rc != 0) /* Partition doesn't exist */
307			goto out;
308		dev->d_offset = part.start;
309		slice = part.index;
310		if (ptable_gettype(od->table) == PTABLE_GPT) {
311			partition = 255;
312			goto out; /* Nothing more to do */
313		} else if (partition == 255) {
314			/*
315			 * When we try to open GPT partition, but partition
316			 * table isn't GPT, reset d_partition value to -1
317			 * and try to autodetect appropriate value.
318			 */
319			partition = -1;
320		}
321		/*
322		 * If d_partition < 0 and we are looking at a BSD slice,
323		 * then try to read BSD label, otherwise return the
324		 * whole MBR slice.
325		 */
326		if (partition == -1 &&
327		    part.type != PART_FREEBSD)
328			goto out;
329		/* Try to read BSD label */
330		table = ptable_open(dev, part.end - part.start + 1,
331		    od->sectorsize, ptblread);
332		if (table == NULL) {
333			DEBUG("Can't read BSD label");
334			rc = ENXIO;
335			goto out;
336		}
337		/*
338		 * If slice contains BSD label and d_partition < 0, then
339		 * assume the 'a' partition. Otherwise just return the
340		 * whole MBR slice, because it can contain ZFS.
341		 */
342		if (partition < 0) {
343			if (ptable_gettype(table) != PTABLE_BSD)
344				goto out;
345			partition = 0;
346		}
347		rc = ptable_getpart(table, &part, partition);
348		if (rc != 0)
349			goto out;
350		dev->d_offset += part.start;
351	}
352out:
353	if (table != NULL)
354		ptable_close(table);
355
356	if (rc != 0) {
357		if (od->rcnt < 1) {
358			if (od->table != NULL)
359				ptable_close(od->table);
360			free(od);
361		}
362		DEBUG("%s could not open", disk_fmtdev(dev));
363	} else {
364		if ((flags & DISK_F_NOCACHE) == 0)
365			disk_insert(dev);
366		/* Save the slice and partition number to the dev */
367		dev->d_slice = slice;
368		dev->d_partition = partition;
369		DEBUG("%s offset %lld => %p", disk_fmtdev(dev),
370		    dev->d_offset, od);
371	}
372	return (rc);
373}
374
375int
376disk_close(struct disk_devdesc *dev)
377{
378	struct open_disk *od;
379
380	od = (struct open_disk *)dev->d_opendata;
381	DEBUG("%s closed => %p [%d]", disk_fmtdev(dev), od, od->rcnt);
382	if (od->flags & DISK_F_NOCACHE) {
383		ptable_close(od->table);
384		free(od);
385	}
386	return (0);
387}
388
389void
390disk_cleanup(const struct devsw *d_dev)
391{
392#ifdef DISK_DEBUG
393	struct disk_devdesc dev;
394#endif
395	struct dentry *entry, *tmp;
396
397	STAILQ_FOREACH_SAFE(entry, &opened_disks, entry, tmp) {
398		if (entry->d_dev != d_dev)
399			continue;
400		entry->od->rcnt--;
401#ifdef DISK_DEBUG
402		dev.d_dev = (struct devsw *)entry->d_dev;
403		dev.d_unit = entry->d_unit;
404		dev.d_slice = entry->d_slice;
405		dev.d_partition = entry->d_partition;
406		DEBUG("%s was freed => %p [%d]", disk_fmtdev(&dev),
407		    entry->od, entry->od->rcnt);
408#endif
409		STAILQ_REMOVE(&opened_disks, entry, dentry, entry);
410		if (entry->od->rcnt < 1) {
411			if (entry->od->table != NULL)
412				ptable_close(entry->od->table);
413			free(entry->od);
414		}
415		free(entry);
416	}
417}
418
419char*
420disk_fmtdev(struct disk_devdesc *dev)
421{
422	static char buf[128];
423	char *cp;
424
425	cp = buf + sprintf(buf, "%s%d", dev->d_dev->dv_name, dev->d_unit);
426	if (dev->d_slice >= 0) {
427#ifdef LOADER_GPT_SUPPORT
428		if (dev->d_partition == 255) {
429			sprintf(cp, "p%d:", dev->d_slice);
430			return (buf);
431		} else
432#endif
433#ifdef LOADER_MBR_SUPPORT
434			cp += sprintf(cp, "s%d", dev->d_slice);
435#endif
436	}
437	if (dev->d_partition >= 0)
438		cp += sprintf(cp, "%c", dev->d_partition + 'a');
439	strcat(cp, ":");
440	return (buf);
441}
442
443int
444disk_parsedev(struct disk_devdesc *dev, const char *devspec, const char **path)
445{
446	int unit, slice, partition;
447	const char *np;
448	char *cp;
449
450	np = devspec;
451	unit = slice = partition = -1;
452	if (*np != '\0' && *np != ':') {
453		unit = strtol(np, &cp, 10);
454		if (cp == np)
455			return (EUNIT);
456#ifdef LOADER_GPT_SUPPORT
457		if (*cp == 'p') {
458			np = cp + 1;
459			slice = strtol(np, &cp, 10);
460			if (np == cp)
461				return (ESLICE);
462			/* we don't support nested partitions on GPT */
463			if (*cp != '\0' && *cp != ':')
464				return (EINVAL);
465			partition = 255;
466		} else
467#endif
468#ifdef LOADER_MBR_SUPPORT
469		if (*cp == 's') {
470			np = cp + 1;
471			slice = strtol(np, &cp, 10);
472			if (np == cp)
473				return (ESLICE);
474		}
475#endif
476		if (*cp != '\0' && *cp != ':') {
477			partition = *cp - 'a';
478			if (partition < 0)
479				return (EPART);
480			cp++;
481		}
482	} else
483		return (EINVAL);
484
485	if (*cp != '\0' && *cp != ':')
486		return (EINVAL);
487	dev->d_unit = unit;
488	dev->d_slice = slice;
489	dev->d_partition = partition;
490	if (path != NULL)
491		*path = (*cp == '\0') ? cp: cp + 1;
492	return (0);
493}
494