biosdisk.c revision 332056
1/*-
2 * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
3 * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/11/stand/i386/libi386/biosdisk.c 332056 2018-04-05 00:41:33Z kevans $");
30
31/*
32 * BIOS disk device handling.
33 *
34 * Ideas and algorithms from:
35 *
36 * - NetBSD libi386/biosdisk.c
37 * - FreeBSD biosboot/disk.c
38 *
39 */
40
41#include <sys/disk.h>
42#include <sys/limits.h>
43#include <stand.h>
44#include <machine/bootinfo.h>
45#include <stdarg.h>
46
47#include <bootstrap.h>
48#include <btxv86.h>
49#include <edd.h>
50#include "disk.h"
51#include "libi386.h"
52
53#ifdef LOADER_GELI_SUPPORT
54#include "cons.h"
55#include "drv.h"
56#include "gpt.h"
57#include "part.h"
58#include <uuid.h>
59struct pentry {
60	struct ptable_entry	part;
61	uint64_t		flags;
62	union {
63		uint8_t bsd;
64		uint8_t	mbr;
65		uuid_t	gpt;
66		uint16_t vtoc8;
67	} type;
68	STAILQ_ENTRY(pentry)	entry;
69};
70struct ptable {
71	enum ptable_type	type;
72	uint16_t		sectorsize;
73	uint64_t		sectors;
74
75	STAILQ_HEAD(, pentry)	entries;
76};
77
78#include "geliboot.c"
79#endif /* LOADER_GELI_SUPPORT */
80
81CTASSERT(sizeof(struct i386_devdesc) >= sizeof(struct disk_devdesc));
82
83#define BIOS_NUMDRIVES		0x475
84#define BIOSDISK_SECSIZE	512
85#define BUFSIZE			(1 * BIOSDISK_SECSIZE)
86
87#define DT_ATAPI		0x10		/* disk type for ATAPI floppies */
88#define WDMAJOR			0		/* major numbers for devices we frontend for */
89#define WFDMAJOR		1
90#define FDMAJOR			2
91#define DAMAJOR			4
92
93#ifdef DISK_DEBUG
94# define DEBUG(fmt, args...)	printf("%s: " fmt "\n" , __func__ , ## args)
95#else
96# define DEBUG(fmt, args...)
97#endif
98
99/*
100 * List of BIOS devices, translation from disk unit number to
101 * BIOS unit number.
102 */
103static struct bdinfo
104{
105	int		bd_unit;	/* BIOS unit number */
106	int		bd_cyl;		/* BIOS geometry */
107	int		bd_hds;
108	int		bd_sec;
109	int		bd_flags;
110#define	BD_MODEINT13	0x0000
111#define	BD_MODEEDD1	0x0001
112#define	BD_MODEEDD3	0x0002
113#define	BD_MODEMASK	0x0003
114#define	BD_FLOPPY	0x0004
115	int		bd_type;	/* BIOS 'drive type' (floppy only) */
116	uint16_t	bd_sectorsize;	/* Sector size */
117	uint64_t	bd_sectors;	/* Disk size */
118	int		bd_open;	/* reference counter */
119	void		*bd_bcache;	/* buffer cache data */
120} bdinfo [MAXBDDEV];
121static int nbdinfo = 0;
122
123#define	BD(dev)		(bdinfo[(dev)->d_unit])
124
125static void bd_io_workaround(struct disk_devdesc *dev);
126
127static int bd_read(struct disk_devdesc *dev, daddr_t dblk, int blks,
128    caddr_t dest);
129static int bd_write(struct disk_devdesc *dev, daddr_t dblk, int blks,
130    caddr_t dest);
131static int bd_int13probe(struct bdinfo *bd);
132
133static int bd_init(void);
134static int bd_strategy(void *devdata, int flag, daddr_t dblk, size_t size,
135    char *buf, size_t *rsize);
136static int bd_realstrategy(void *devdata, int flag, daddr_t dblk, size_t size,
137    char *buf, size_t *rsize);
138static int bd_open(struct open_file *f, ...);
139static int bd_close(struct open_file *f);
140static int bd_ioctl(struct open_file *f, u_long cmd, void *data);
141static int bd_print(int verbose);
142
143#ifdef LOADER_GELI_SUPPORT
144enum isgeli {
145	ISGELI_UNKNOWN,
146	ISGELI_NO,
147	ISGELI_YES
148};
149static enum isgeli geli_status[MAXBDDEV][MAXTBLENTS];
150
151int bios_read(void *, void *, off_t off, void *buf, size_t bytes);
152#endif /* LOADER_GELI_SUPPORT */
153
154struct devsw biosdisk = {
155	"disk",
156	DEVT_DISK,
157	bd_init,
158	bd_strategy,
159	bd_open,
160	bd_close,
161	bd_ioctl,
162	bd_print,
163	NULL
164};
165
166/*
167 * Translate between BIOS device numbers and our private unit numbers.
168 */
169int
170bd_bios2unit(int biosdev)
171{
172	int i;
173
174	DEBUG("looking for bios device 0x%x", biosdev);
175	for (i = 0; i < nbdinfo; i++) {
176		DEBUG("bd unit %d is BIOS device 0x%x", i, bdinfo[i].bd_unit);
177		if (bdinfo[i].bd_unit == biosdev)
178			return (i);
179	}
180	return (-1);
181}
182
183int
184bd_unit2bios(int unit)
185{
186
187	if ((unit >= 0) && (unit < nbdinfo))
188		return (bdinfo[unit].bd_unit);
189	return (-1);
190}
191
192/*
193 * Quiz the BIOS for disk devices, save a little info about them.
194 */
195static int
196bd_init(void)
197{
198	int base, unit, nfd = 0;
199
200#ifdef LOADER_GELI_SUPPORT
201	geli_init();
202#endif
203	/* sequence 0, 0x80 */
204	for (base = 0; base <= 0x80; base += 0x80) {
205		for (unit = base; (nbdinfo < MAXBDDEV); unit++) {
206#ifndef VIRTUALBOX
207			/*
208			 * Check the BIOS equipment list for number
209			 * of fixed disks.
210			 */
211			if(base == 0x80 &&
212			    (nfd >= *(unsigned char *)PTOV(BIOS_NUMDRIVES)))
213				break;
214#endif
215			bdinfo[nbdinfo].bd_open = 0;
216			bdinfo[nbdinfo].bd_bcache = NULL;
217			bdinfo[nbdinfo].bd_unit = unit;
218			bdinfo[nbdinfo].bd_flags = unit < 0x80 ? BD_FLOPPY: 0;
219			if (!bd_int13probe(&bdinfo[nbdinfo]))
220				break;
221
222			/* XXX we need "disk aliases" to make this simpler */
223			printf("BIOS drive %c: is disk%d\n", (unit < 0x80) ?
224			    ('A' + unit): ('C' + unit - 0x80), nbdinfo);
225			nbdinfo++;
226			if (base == 0x80)
227				nfd++;
228		}
229	}
230	bcache_add_dev(nbdinfo);
231	return(0);
232}
233
234/*
235 * Try to detect a device supported by the legacy int13 BIOS
236 */
237static int
238bd_int13probe(struct bdinfo *bd)
239{
240	struct edd_params params;
241	int ret = 1;	/* assume success */
242
243	v86.ctl = V86_FLAGS;
244	v86.addr = 0x13;
245	v86.eax = 0x800;
246	v86.edx = bd->bd_unit;
247	v86int();
248
249	/* Don't error out if we get bad sector number, try EDD as well */
250	if (V86_CY(v86.efl) ||	/* carry set */
251	    (v86.edx & 0xff) <= (unsigned)(bd->bd_unit & 0x7f))	/* unit # bad */
252		return (0);	/* skip device */
253
254	if ((v86.ecx & 0x3f) == 0) /* absurd sector number */
255		ret = 0;	/* set error */
256
257	/* Convert max cyl # -> # of cylinders */
258	bd->bd_cyl = ((v86.ecx & 0xc0) << 2) + ((v86.ecx & 0xff00) >> 8) + 1;
259	/* Convert max head # -> # of heads */
260	bd->bd_hds = ((v86.edx & 0xff00) >> 8) + 1;
261	bd->bd_sec = v86.ecx & 0x3f;
262	bd->bd_type = v86.ebx & 0xff;
263	bd->bd_flags |= BD_MODEINT13;
264
265	/* Calculate sectors count from the geometry */
266	bd->bd_sectors = bd->bd_cyl * bd->bd_hds * bd->bd_sec;
267	bd->bd_sectorsize = BIOSDISK_SECSIZE;
268	DEBUG("unit 0x%x geometry %d/%d/%d", bd->bd_unit, bd->bd_cyl,
269	    bd->bd_hds, bd->bd_sec);
270
271	/* Determine if we can use EDD with this device. */
272	v86.ctl = V86_FLAGS;
273	v86.addr = 0x13;
274	v86.eax = 0x4100;
275	v86.edx = bd->bd_unit;
276	v86.ebx = 0x55aa;
277	v86int();
278	if (V86_CY(v86.efl) ||	/* carry set */
279	    (v86.ebx & 0xffff) != 0xaa55 || /* signature */
280	    (v86.ecx & EDD_INTERFACE_FIXED_DISK) == 0)
281		return (ret);	/* return code from int13 AH=08 */
282
283	/* EDD supported */
284	bd->bd_flags |= BD_MODEEDD1;
285	if ((v86.eax & 0xff00) >= 0x3000)
286		bd->bd_flags |= BD_MODEEDD3;
287	/* Get disk params */
288	params.len = sizeof(struct edd_params);
289	v86.ctl = V86_FLAGS;
290	v86.addr = 0x13;
291	v86.eax = 0x4800;
292	v86.edx = bd->bd_unit;
293	v86.ds = VTOPSEG(&params);
294	v86.esi = VTOPOFF(&params);
295	v86int();
296	if (!V86_CY(v86.efl)) {
297		uint64_t total;
298
299		/*
300		 * Sector size must be a multiple of 512 bytes.
301		 * An alternate test would be to check power of 2,
302		 * powerof2(params.sector_size).
303		 */
304		if (params.sector_size % BIOSDISK_SECSIZE)
305			bd->bd_sectorsize = BIOSDISK_SECSIZE;
306		else
307			bd->bd_sectorsize = params.sector_size;
308
309		total = bd->bd_sectorsize * params.sectors;
310		if (params.sectors != 0) {
311			/* Only update if we did not overflow. */
312			if (total > params.sectors)
313				bd->bd_sectors = params.sectors;
314		}
315
316		total = (uint64_t)params.cylinders *
317		    params.heads * params.sectors_per_track;
318		if (bd->bd_sectors < total)
319			bd->bd_sectors = total;
320
321		ret = 1;
322	}
323	DEBUG("unit 0x%x flags %x, sectors %llu, sectorsize %u",
324	    bd->bd_unit, bd->bd_flags, bd->bd_sectors, bd->bd_sectorsize);
325	return (ret);
326}
327
328/*
329 * Print information about disks
330 */
331static int
332bd_print(int verbose)
333{
334	static char line[80];
335	struct disk_devdesc dev;
336	int i, ret = 0;
337
338	if (nbdinfo == 0)
339		return (0);
340
341	printf("%s devices:", biosdisk.dv_name);
342	if ((ret = pager_output("\n")) != 0)
343		return (ret);
344
345	for (i = 0; i < nbdinfo; i++) {
346		snprintf(line, sizeof(line),
347		    "    disk%d:   BIOS drive %c (%ju X %u):\n", i,
348		    (bdinfo[i].bd_unit < 0x80) ? ('A' + bdinfo[i].bd_unit):
349		    ('C' + bdinfo[i].bd_unit - 0x80),
350		    (uintmax_t)bdinfo[i].bd_sectors,
351		    bdinfo[i].bd_sectorsize);
352		if ((ret = pager_output(line)) != 0)
353			break;
354		dev.d_dev = &biosdisk;
355		dev.d_unit = i;
356		dev.d_slice = -1;
357		dev.d_partition = -1;
358		if (disk_open(&dev,
359		    bdinfo[i].bd_sectorsize * bdinfo[i].bd_sectors,
360		    bdinfo[i].bd_sectorsize) == 0) {
361			snprintf(line, sizeof(line), "    disk%d", i);
362			ret = disk_print(&dev, line, verbose);
363			disk_close(&dev);
364			if (ret != 0)
365			    return (ret);
366		}
367	}
368	return (ret);
369}
370
371/*
372 * Attempt to open the disk described by (dev) for use by (f).
373 *
374 * Note that the philosophy here is "give them exactly what
375 * they ask for".  This is necessary because being too "smart"
376 * about what the user might want leads to complications.
377 * (eg. given no slice or partition value, with a disk that is
378 *  sliced - are they after the first BSD slice, or the DOS
379 *  slice before it?)
380 */
381static int
382bd_open(struct open_file *f, ...)
383{
384	struct disk_devdesc *dev, rdev;
385	struct disk_devdesc disk;
386	int err, g_err;
387	va_list ap;
388	uint64_t size;
389
390	va_start(ap, f);
391	dev = va_arg(ap, struct disk_devdesc *);
392	va_end(ap);
393
394	if (dev->d_unit < 0 || dev->d_unit >= nbdinfo)
395		return (EIO);
396	BD(dev).bd_open++;
397	if (BD(dev).bd_bcache == NULL)
398	    BD(dev).bd_bcache = bcache_allocate();
399
400	/*
401	 * Read disk size from partition.
402	 * This is needed to work around buggy BIOS systems returning
403	 * wrong (truncated) disk media size.
404	 * During bd_probe() we tested if the mulitplication of bd_sectors
405	 * would overflow so it should be safe to perform here.
406	 */
407	disk.d_dev = dev->d_dev;
408	disk.d_type = dev->d_type;
409	disk.d_unit = dev->d_unit;
410	disk.d_opendata = NULL;
411	disk.d_slice = -1;
412	disk.d_partition = -1;
413	disk.d_offset = 0;
414	if (disk_open(&disk, BD(dev).bd_sectors * BD(dev).bd_sectorsize,
415	    BD(dev).bd_sectorsize) == 0) {
416
417		if (disk_ioctl(&disk, DIOCGMEDIASIZE, &size) == 0) {
418			size /= BD(dev).bd_sectorsize;
419			if (size > BD(dev).bd_sectors)
420				BD(dev).bd_sectors = size;
421		}
422		disk_close(&disk);
423	}
424
425	err = disk_open(dev, BD(dev).bd_sectors * BD(dev).bd_sectorsize,
426	    BD(dev).bd_sectorsize);
427
428#ifdef LOADER_GELI_SUPPORT
429	static char gelipw[GELI_PW_MAXLEN];
430	char *passphrase;
431
432	if (err)
433		return (err);
434
435	/* if we already know there is no GELI, skip the rest */
436	if (geli_status[dev->d_unit][dev->d_slice] != ISGELI_UNKNOWN)
437		return (err);
438
439	struct dsk dskp;
440	struct ptable *table = NULL;
441	struct ptable_entry part;
442	struct pentry *entry;
443	int geli_part = 0;
444
445	dskp.drive = bd_unit2bios(dev->d_unit);
446	dskp.type = dev->d_type;
447	dskp.unit = dev->d_unit;
448	dskp.slice = dev->d_slice;
449	dskp.part = dev->d_partition;
450	dskp.start = dev->d_offset;
451
452	memcpy(&rdev, dev, sizeof(rdev));
453	/* to read the GPT table, we need to read the first sector */
454	rdev.d_offset = 0;
455	/* We need the LBA of the end of the partition */
456	table = ptable_open(&rdev, BD(dev).bd_sectors,
457	    BD(dev).bd_sectorsize, ptblread);
458	if (table == NULL) {
459		DEBUG("Can't read partition table");
460		/* soft failure, return the exit status of disk_open */
461		return (err);
462	}
463
464	if (table->type == PTABLE_GPT)
465		dskp.part = 255;
466
467	STAILQ_FOREACH(entry, &table->entries, entry) {
468		dskp.slice = entry->part.index;
469		dskp.start = entry->part.start;
470		if (is_geli(&dskp) == 0) {
471			geli_status[dev->d_unit][dskp.slice] = ISGELI_YES;
472			return (0);
473		}
474		if (geli_taste(bios_read, &dskp,
475		    entry->part.end - entry->part.start) == 0) {
476			if (geli_havekey(&dskp) == 0) {
477				geli_status[dev->d_unit][dskp.slice] = ISGELI_YES;
478				geli_part++;
479				continue;
480			}
481			if ((passphrase = getenv("kern.geom.eli.passphrase"))
482			    != NULL) {
483				/* Use the cached passphrase */
484				bcopy(passphrase, &gelipw, GELI_PW_MAXLEN);
485			}
486			if (geli_passphrase(gelipw, dskp.unit, 'p',
487				    (dskp.slice > 0 ? dskp.slice : dskp.part),
488				    &dskp) == 0) {
489				setenv("kern.geom.eli.passphrase", gelipw, 1);
490				bzero(gelipw, sizeof(gelipw));
491				geli_status[dev->d_unit][dskp.slice] = ISGELI_YES;
492				geli_part++;
493				continue;
494			}
495		} else
496			geli_status[dev->d_unit][dskp.slice] = ISGELI_NO;
497	}
498
499	/* none of the partitions on this disk have GELI */
500	if (geli_part == 0) {
501		/* found no GELI */
502		geli_status[dev->d_unit][dev->d_slice] = ISGELI_NO;
503	}
504#endif /* LOADER_GELI_SUPPORT */
505
506	return (err);
507}
508
509static int
510bd_close(struct open_file *f)
511{
512	struct disk_devdesc *dev;
513
514	dev = (struct disk_devdesc *)f->f_devdata;
515	BD(dev).bd_open--;
516	if (BD(dev).bd_open == 0) {
517	    bcache_free(BD(dev).bd_bcache);
518	    BD(dev).bd_bcache = NULL;
519	}
520	return (disk_close(dev));
521}
522
523static int
524bd_ioctl(struct open_file *f, u_long cmd, void *data)
525{
526	struct disk_devdesc *dev;
527	int rc;
528
529	dev = (struct disk_devdesc *)f->f_devdata;
530
531	rc = disk_ioctl(dev, cmd, data);
532	if (rc != ENOTTY)
533		return (rc);
534
535	switch (cmd) {
536	case DIOCGSECTORSIZE:
537		*(u_int *)data = BD(dev).bd_sectorsize;
538		break;
539	case DIOCGMEDIASIZE:
540		*(uint64_t *)data = BD(dev).bd_sectors * BD(dev).bd_sectorsize;
541		break;
542	default:
543		return (ENOTTY);
544	}
545	return (0);
546}
547
548static int
549bd_strategy(void *devdata, int rw, daddr_t dblk, size_t size,
550    char *buf, size_t *rsize)
551{
552	struct bcache_devdata bcd;
553	struct disk_devdesc *dev;
554
555	dev = (struct disk_devdesc *)devdata;
556	bcd.dv_strategy = bd_realstrategy;
557	bcd.dv_devdata = devdata;
558	bcd.dv_cache = BD(dev).bd_bcache;
559	return (bcache_strategy(&bcd, rw, dblk + dev->d_offset,
560	    size, buf, rsize));
561}
562
563static int
564bd_realstrategy(void *devdata, int rw, daddr_t dblk, size_t size,
565    char *buf, size_t *rsize)
566{
567    struct disk_devdesc *dev = (struct disk_devdesc *)devdata;
568    uint64_t		disk_blocks;
569    int			blks, rc;
570#ifdef BD_SUPPORT_FRAGS /* XXX: sector size */
571    char		fragbuf[BIOSDISK_SECSIZE];
572    size_t		fragsize;
573
574    fragsize = size % BIOSDISK_SECSIZE;
575#else
576    if (size % BD(dev).bd_sectorsize)
577	panic("bd_strategy: %d bytes I/O not multiple of block size", size);
578#endif
579
580    DEBUG("open_disk %p", dev);
581
582    /*
583     * Check the value of the size argument. We do have quite small
584     * heap (64MB), but we do not know good upper limit, so we check against
585     * INT_MAX here. This will also protect us against possible overflows
586     * while translating block count to bytes.
587     */
588    if (size > INT_MAX) {
589	DEBUG("too large read: %zu bytes", size);
590	return (EIO);
591    }
592
593    blks = size / BD(dev).bd_sectorsize;
594    if (dblk > dblk + blks)
595	return (EIO);
596
597    if (rsize)
598	*rsize = 0;
599
600    /* Get disk blocks, this value is either for whole disk or for partition */
601    if (disk_ioctl(dev, DIOCGMEDIASIZE, &disk_blocks)) {
602	/* DIOCGMEDIASIZE does return bytes. */
603        disk_blocks /= BD(dev).bd_sectorsize;
604    } else {
605	/* We should not get here. Just try to survive. */
606	disk_blocks = BD(dev).bd_sectors - dev->d_offset;
607    }
608
609    /* Validate source block address. */
610    if (dblk < dev->d_offset || dblk >= dev->d_offset + disk_blocks)
611	return (EIO);
612
613    /*
614     * Truncate if we are crossing disk or partition end.
615     */
616    if (dblk + blks >= dev->d_offset + disk_blocks) {
617	blks = dev->d_offset + disk_blocks - dblk;
618	size = blks * BD(dev).bd_sectorsize;
619	DEBUG("short read %d", blks);
620    }
621
622    switch (rw & F_MASK) {
623    case F_READ:
624	DEBUG("read %d from %lld to %p", blks, dblk, buf);
625
626	if (blks && (rc = bd_read(dev, dblk, blks, buf))) {
627	    /* Filter out floppy controller errors */
628	    if (BD(dev).bd_flags != BD_FLOPPY || rc != 0x20) {
629		printf("read %d from %lld to %p, error: 0x%x", blks, dblk,
630		    buf, rc);
631	    }
632	    return (EIO);
633	}
634#ifdef BD_SUPPORT_FRAGS /* XXX: sector size */
635	DEBUG("bd_strategy: frag read %d from %d+%d to %p",
636	    fragsize, dblk, blks, buf + (blks * BIOSDISK_SECSIZE));
637	if (fragsize && bd_read(od, dblk + blks, 1, fragsize)) {
638	    DEBUG("frag read error");
639	    return(EIO);
640	}
641	bcopy(fragbuf, buf + (blks * BIOSDISK_SECSIZE), fragsize);
642#endif
643	break;
644    case F_WRITE :
645	DEBUG("write %d from %d to %p", blks, dblk, buf);
646
647	if (blks && bd_write(dev, dblk, blks, buf)) {
648	    DEBUG("write error");
649	    return (EIO);
650	}
651#ifdef BD_SUPPORT_FRAGS
652	if(fragsize) {
653	    DEBUG("Attempted to write a frag");
654	    return (EIO);
655	}
656#endif
657	break;
658    default:
659	/* DO NOTHING */
660	return (EROFS);
661    }
662
663    if (rsize)
664	*rsize = size;
665    return (0);
666}
667
668static int
669bd_edd_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest,
670    int write)
671{
672    static struct edd_packet packet;
673
674    packet.len = sizeof(struct edd_packet);
675    packet.count = blks;
676    packet.off = VTOPOFF(dest);
677    packet.seg = VTOPSEG(dest);
678    packet.lba = dblk;
679    v86.ctl = V86_FLAGS;
680    v86.addr = 0x13;
681    if (write)
682	/* Should we Write with verify ?? 0x4302 ? */
683	v86.eax = 0x4300;
684    else
685	v86.eax = 0x4200;
686    v86.edx = BD(dev).bd_unit;
687    v86.ds = VTOPSEG(&packet);
688    v86.esi = VTOPOFF(&packet);
689    v86int();
690    if (V86_CY(v86.efl))
691	return (v86.eax >> 8);
692    return (0);
693}
694
695static int
696bd_chs_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest,
697    int write)
698{
699    u_int	x, bpc, cyl, hd, sec;
700
701    bpc = BD(dev).bd_sec * BD(dev).bd_hds;	/* blocks per cylinder */
702    x = dblk;
703    cyl = x / bpc;			/* block # / blocks per cylinder */
704    x %= bpc;				/* block offset into cylinder */
705    hd = x / BD(dev).bd_sec;		/* offset / blocks per track */
706    sec = x % BD(dev).bd_sec;		/* offset into track */
707
708    /* correct sector number for 1-based BIOS numbering */
709    sec++;
710
711    if (cyl > 1023)
712	/* CHS doesn't support cylinders > 1023. */
713	return (1);
714
715    v86.ctl = V86_FLAGS;
716    v86.addr = 0x13;
717    if (write)
718	v86.eax = 0x300 | blks;
719    else
720	v86.eax = 0x200 | blks;
721    v86.ecx = ((cyl & 0xff) << 8) | ((cyl & 0x300) >> 2) | sec;
722    v86.edx = (hd << 8) | BD(dev).bd_unit;
723    v86.es = VTOPSEG(dest);
724    v86.ebx = VTOPOFF(dest);
725    v86int();
726    if (V86_CY(v86.efl))
727	return (v86.eax >> 8);
728    return (0);
729}
730
731static void
732bd_io_workaround(struct disk_devdesc *dev)
733{
734	uint8_t buf[8 * 1024];
735
736	bd_edd_io(dev, 0xffffffff, 1, (caddr_t)buf, 0);
737}
738
739
740static int
741bd_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest, int write)
742{
743    u_int	x, sec, result, resid, retry, maxfer;
744    caddr_t	p, xp, bbuf;
745
746    /* Just in case some idiot actually tries to read/write -1 blocks... */
747    if (blks < 0)
748	return (-1);
749
750    resid = blks;
751    p = dest;
752
753    /*
754     * Workaround for a problem with some HP ProLiant BIOS failing to work out
755     * the boot disk after installation. hrs and kuriyama discovered this
756     * problem with an HP ProLiant DL320e Gen 8 with a 3TB HDD, and discovered
757     * that an int13h call seems to cause a buffer overrun in the bios. The
758     * problem is alleviated by doing an extra read before the buggy read. It
759     * is not immediately known whether other models are similarly affected.
760     */
761    if (dblk >= 0x100000000)
762	bd_io_workaround(dev);
763
764    /* Decide whether we have to bounce */
765    if (VTOP(dest) >> 20 != 0 || (BD(dev).bd_unit < 0x80 &&
766	(VTOP(dest) >> 16) != (VTOP(dest +
767	blks * BD(dev).bd_sectorsize) >> 16))) {
768
769	/*
770	 * There is a 64k physical boundary somewhere in the
771	 * destination buffer, or the destination buffer is above
772	 * first 1MB of physical memory so we have to arrange a
773	 * suitable bounce buffer.  Allocate a buffer twice as large
774	 * as we need to.  Use the bottom half unless there is a break
775	 * there, in which case we use the top half.
776	 */
777	x = V86_IO_BUFFER_SIZE / BD(dev).bd_sectorsize;
778	x = min(x, (unsigned)blks);
779	bbuf = PTOV(V86_IO_BUFFER);
780	maxfer = x;		/* limit transfers to bounce region size */
781    } else {
782	bbuf = NULL;
783	maxfer = 0;
784    }
785
786    while (resid > 0) {
787	/*
788	 * Play it safe and don't cross track boundaries.
789	 * (XXX this is probably unnecessary)
790	 */
791	sec = dblk % BD(dev).bd_sec;	/* offset into track */
792	x = min(BD(dev).bd_sec - sec, resid);
793	if (maxfer > 0)
794	    x = min(x, maxfer);		/* fit bounce buffer */
795
796	/* where do we transfer to? */
797	xp = bbuf == NULL ? p : bbuf;
798
799	/*
800	 * Put your Data In, Put your Data out,
801	 * Put your Data In, and shake it all about
802	 */
803	if (write && bbuf != NULL)
804	    bcopy(p, bbuf, x * BD(dev).bd_sectorsize);
805
806	/*
807	 * Loop retrying the operation a couple of times.  The BIOS
808	 * may also retry.
809	 */
810	for (retry = 0; retry < 3; retry++) {
811	    /* if retrying, reset the drive */
812	    if (retry > 0) {
813		v86.ctl = V86_FLAGS;
814		v86.addr = 0x13;
815		v86.eax = 0;
816		v86.edx = BD(dev).bd_unit;
817		v86int();
818	    }
819
820	    if (BD(dev).bd_flags & BD_MODEEDD1)
821		result = bd_edd_io(dev, dblk, x, xp, write);
822	    else
823		result = bd_chs_io(dev, dblk, x, xp, write);
824	    if (result == 0)
825		break;
826	}
827
828	if (write)
829	    DEBUG("Write %d sector(s) from %p (0x%x) to %lld %s", x,
830		p, VTOP(p), dblk, result ? "failed" : "ok");
831	else
832	    DEBUG("Read %d sector(s) from %lld to %p (0x%x) %s", x,
833		dblk, p, VTOP(p), result ? "failed" : "ok");
834	if (result) {
835	    return (result);
836	}
837	if (!write && bbuf != NULL)
838	    bcopy(bbuf, p, x * BD(dev).bd_sectorsize);
839	p += (x * BD(dev).bd_sectorsize);
840	dblk += x;
841	resid -= x;
842    }
843
844/*    hexdump(dest, (blks * BD(dev).bd_sectorsize)); */
845    return(0);
846}
847
848static int
849bd_read(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest)
850{
851#ifdef LOADER_GELI_SUPPORT
852	struct dsk dskp;
853	off_t p_off, diff;
854	daddr_t alignlba;
855	int err, n, alignblks;
856	char *tmpbuf;
857
858	/* if we already know there is no GELI, skip the rest */
859	if (geli_status[dev->d_unit][dev->d_slice] != ISGELI_YES)
860		return (bd_io(dev, dblk, blks, dest, 0));
861
862	if (geli_status[dev->d_unit][dev->d_slice] == ISGELI_YES) {
863		/*
864		 * Align reads to DEV_GELIBOOT_BSIZE bytes because partial
865		 * sectors cannot be decrypted. Round the requested LBA down to
866		 * nearest multiple of DEV_GELIBOOT_BSIZE bytes.
867		 */
868		alignlba = rounddown2(dblk * BD(dev).bd_sectorsize,
869		    DEV_GELIBOOT_BSIZE) / BD(dev).bd_sectorsize;
870		/*
871		 * Round number of blocks to read up to nearest multiple of
872		 * DEV_GELIBOOT_BSIZE
873		 */
874		diff = (dblk - alignlba) * BD(dev).bd_sectorsize;
875		alignblks = roundup2(blks * BD(dev).bd_sectorsize + diff,
876		    DEV_GELIBOOT_BSIZE) / BD(dev).bd_sectorsize;
877
878		/*
879		 * If the read is rounded up to a larger size, use a temporary
880		 * buffer here because the buffer provided by the caller may be
881		 * too small.
882		 */
883		if (diff == 0) {
884			tmpbuf = dest;
885		} else {
886			tmpbuf = malloc(alignblks * BD(dev).bd_sectorsize);
887			if (tmpbuf == NULL) {
888				return (-1);
889			}
890		}
891
892		err = bd_io(dev, alignlba, alignblks, tmpbuf, 0);
893		if (err)
894			return (err);
895
896		dskp.drive = bd_unit2bios(dev->d_unit);
897		dskp.type = dev->d_type;
898		dskp.unit = dev->d_unit;
899		dskp.slice = dev->d_slice;
900		dskp.part = dev->d_partition;
901		dskp.start = dev->d_offset;
902
903		/* GELI needs the offset relative to the partition start */
904		p_off = alignlba - dskp.start;
905
906		err = geli_read(&dskp, p_off * BD(dev).bd_sectorsize, (u_char *)tmpbuf,
907		    alignblks * BD(dev).bd_sectorsize);
908		if (err)
909			return (err);
910
911		if (tmpbuf != dest) {
912			bcopy(tmpbuf + diff, dest, blks * BD(dev).bd_sectorsize);
913			free(tmpbuf);
914		}
915		return (0);
916	}
917#endif /* LOADER_GELI_SUPPORT */
918
919	return (bd_io(dev, dblk, blks, dest, 0));
920}
921
922static int
923bd_write(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest)
924{
925
926	return (bd_io(dev, dblk, blks, dest, 1));
927}
928
929/*
930 * Return the BIOS geometry of a given "fixed drive" in a format
931 * suitable for the legacy bootinfo structure.  Since the kernel is
932 * expecting raw int 0x13/0x8 values for N_BIOS_GEOM drives, we
933 * prefer to get the information directly, rather than rely on being
934 * able to put it together from information already maintained for
935 * different purposes and for a probably different number of drives.
936 *
937 * For valid drives, the geometry is expected in the format (31..0)
938 * "000000cc cccccccc hhhhhhhh 00ssssss"; and invalid drives are
939 * indicated by returning the geometry of a "1.2M" PC-format floppy
940 * disk.  And, incidentally, what is returned is not the geometry as
941 * such but the highest valid cylinder, head, and sector numbers.
942 */
943u_int32_t
944bd_getbigeom(int bunit)
945{
946
947    v86.ctl = V86_FLAGS;
948    v86.addr = 0x13;
949    v86.eax = 0x800;
950    v86.edx = 0x80 + bunit;
951    v86int();
952    if (V86_CY(v86.efl))
953	return 0x4f010f;
954    return ((v86.ecx & 0xc0) << 18) | ((v86.ecx & 0xff00) << 8) |
955	   (v86.edx & 0xff00) | (v86.ecx & 0x3f);
956}
957
958/*
959 * Return a suitable dev_t value for (dev).
960 *
961 * In the case where it looks like (dev) is a SCSI disk, we allow the number of
962 * IDE disks to be specified in $num_ide_disks.  There should be a Better Way.
963 */
964int
965bd_getdev(struct i386_devdesc *d)
966{
967    struct disk_devdesc		*dev;
968    int				biosdev;
969    int 			major;
970    int				rootdev;
971    char			*nip, *cp;
972    int				i, unit;
973
974    dev = (struct disk_devdesc *)d;
975    biosdev = bd_unit2bios(dev->d_unit);
976    DEBUG("unit %d BIOS device %d", dev->d_unit, biosdev);
977    if (biosdev == -1)				/* not a BIOS device */
978	return(-1);
979    if (disk_open(dev, BD(dev).bd_sectors * BD(dev).bd_sectorsize,
980	BD(dev).bd_sectorsize) != 0)		/* oops, not a viable device */
981	    return (-1);
982    else
983	disk_close(dev);
984
985    if (biosdev < 0x80) {
986	/* floppy (or emulated floppy) or ATAPI device */
987	if (bdinfo[dev->d_unit].bd_type == DT_ATAPI) {
988	    /* is an ATAPI disk */
989	    major = WFDMAJOR;
990	} else {
991	    /* is a floppy disk */
992	    major = FDMAJOR;
993	}
994    } else {
995	    /* assume an IDE disk */
996	    major = WDMAJOR;
997    }
998    /* default root disk unit number */
999    unit = biosdev & 0x7f;
1000
1001    /* XXX a better kludge to set the root disk unit number */
1002    if ((nip = getenv("root_disk_unit")) != NULL) {
1003	i = strtol(nip, &cp, 0);
1004	/* check for parse error */
1005	if ((cp != nip) && (*cp == 0))
1006	    unit = i;
1007    }
1008
1009    rootdev = MAKEBOOTDEV(major, dev->d_slice + 1, unit, dev->d_partition);
1010    DEBUG("dev is 0x%x\n", rootdev);
1011    return(rootdev);
1012}
1013
1014#ifdef LOADER_GELI_SUPPORT
1015int
1016bios_read(void *vdev __unused, void *xpriv, off_t off, void *buf, size_t bytes)
1017{
1018	struct disk_devdesc dev;
1019	struct dsk *priv = xpriv;
1020
1021	dev.d_dev = &biosdisk;
1022	dev.d_type = priv->type;
1023	dev.d_unit = priv->unit;
1024	dev.d_slice = priv->slice;
1025	dev.d_partition = priv->part;
1026	dev.d_offset = priv->start;
1027
1028	off = off / BD(&dev).bd_sectorsize;
1029	/* GELI gives us the offset relative to the partition start */
1030	off += dev.d_offset;
1031	bytes = bytes / BD(&dev).bd_sectorsize;
1032
1033	return (bd_io(&dev, off, bytes, buf, 0));
1034}
1035#endif /* LOADER_GELI_SUPPORT */
1036