biosdisk.c revision 346476
1/*-
2 * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
3 * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/11/stand/i386/libi386/biosdisk.c 346476 2019-04-21 03:36:05Z kevans $");
30
31/*
32 * BIOS disk device handling.
33 *
34 * Ideas and algorithms from:
35 *
36 * - NetBSD libi386/biosdisk.c
37 * - FreeBSD biosboot/disk.c
38 *
39 */
40
41#include <sys/disk.h>
42#include <sys/limits.h>
43#include <sys/queue.h>
44#include <stand.h>
45#include <machine/bootinfo.h>
46#include <stdarg.h>
47#include <stdbool.h>
48
49#include <bootstrap.h>
50#include <btxv86.h>
51#include <edd.h>
52#include "disk.h"
53#include "libi386.h"
54
55#define	BIOS_NUMDRIVES		0x475
56#define	BIOSDISK_SECSIZE	512
57#define	BUFSIZE			(1 * BIOSDISK_SECSIZE)
58
59#define	DT_ATAPI	0x10	/* disk type for ATAPI floppies */
60#define	WDMAJOR		0	/* major numbers for devices we frontend for */
61#define	WFDMAJOR	1
62#define	FDMAJOR		2
63#define	DAMAJOR		4
64#define	ACDMAJOR	117
65#define	CDMAJOR		15
66
67#ifdef DISK_DEBUG
68#define	DEBUG(fmt, args...)	printf("%s: " fmt "\n", __func__, ## args)
69#else
70#define	DEBUG(fmt, args...)
71#endif
72
73struct specification_packet {
74	uint8_t		sp_size;
75	uint8_t		sp_bootmedia;
76	uint8_t		sp_drive;
77	uint8_t		sp_controller;
78	uint32_t	sp_lba;
79	uint16_t	sp_devicespec;
80	uint16_t	sp_buffersegment;
81	uint16_t	sp_loadsegment;
82	uint16_t	sp_sectorcount;
83	uint16_t	sp_cylsec;
84	uint8_t		sp_head;
85};
86
87/*
88 * List of BIOS devices, translation from disk unit number to
89 * BIOS unit number.
90 */
91typedef struct bdinfo
92{
93	STAILQ_ENTRY(bdinfo)	bd_link;	/* link in device list */
94	int		bd_unit;	/* BIOS unit number */
95	int		bd_cyl;		/* BIOS geometry */
96	int		bd_hds;
97	int		bd_sec;
98	int		bd_flags;
99#define	BD_MODEINT13	0x0000
100#define	BD_MODEEDD1	0x0001
101#define	BD_MODEEDD3	0x0002
102#define	BD_MODEEDD	(BD_MODEEDD1 | BD_MODEEDD3)
103#define	BD_MODEMASK	0x0003
104#define	BD_FLOPPY	0x0004
105#define	BD_CDROM	0x0008
106#define	BD_NO_MEDIA	0x0010
107	int		bd_type;	/* BIOS 'drive type' (floppy only) */
108	uint16_t	bd_sectorsize;	/* Sector size */
109	uint64_t	bd_sectors;	/* Disk size */
110	int		bd_open;	/* reference counter */
111	void		*bd_bcache;	/* buffer cache data */
112} bdinfo_t;
113
114#define	BD_RD		0
115#define	BD_WR		1
116
117typedef STAILQ_HEAD(bdinfo_list, bdinfo) bdinfo_list_t;
118static bdinfo_list_t fdinfo = STAILQ_HEAD_INITIALIZER(fdinfo);
119static bdinfo_list_t cdinfo = STAILQ_HEAD_INITIALIZER(cdinfo);
120static bdinfo_list_t hdinfo = STAILQ_HEAD_INITIALIZER(hdinfo);
121
122static void bd_io_workaround(bdinfo_t *);
123static int bd_io(struct disk_devdesc *, bdinfo_t *, daddr_t, int, caddr_t, int);
124static bool bd_int13probe(bdinfo_t *);
125
126static int bd_init(void);
127static int cd_init(void);
128static int fd_init(void);
129static int bd_strategy(void *devdata, int flag, daddr_t dblk, size_t size,
130    char *buf, size_t *rsize);
131static int bd_realstrategy(void *devdata, int flag, daddr_t dblk, size_t size,
132    char *buf, size_t *rsize);
133static int bd_open(struct open_file *f, ...);
134static int bd_close(struct open_file *f);
135static int bd_ioctl(struct open_file *f, u_long cmd, void *data);
136static int bd_print(int verbose);
137static int cd_print(int verbose);
138static int fd_print(int verbose);
139static void bd_reset_disk(int);
140static int bd_get_diskinfo_std(struct bdinfo *);
141
142struct devsw biosfd = {
143	.dv_name = "fd",
144	.dv_type = DEVT_FD,
145	.dv_init = fd_init,
146	.dv_strategy = bd_strategy,
147	.dv_open = bd_open,
148	.dv_close = bd_close,
149	.dv_ioctl = bd_ioctl,
150	.dv_print = fd_print,
151	.dv_cleanup = NULL
152};
153
154struct devsw bioscd = {
155	.dv_name = "cd",
156	.dv_type = DEVT_CD,
157	.dv_init = cd_init,
158	.dv_strategy = bd_strategy,
159	.dv_open = bd_open,
160	.dv_close = bd_close,
161	.dv_ioctl = bd_ioctl,
162	.dv_print = cd_print,
163	.dv_cleanup = NULL
164};
165
166struct devsw bioshd = {
167	.dv_name = "disk",
168	.dv_type = DEVT_DISK,
169	.dv_init = bd_init,
170	.dv_strategy = bd_strategy,
171	.dv_open = bd_open,
172	.dv_close = bd_close,
173	.dv_ioctl = bd_ioctl,
174	.dv_print = bd_print,
175	.dv_cleanup = NULL
176};
177
178static bdinfo_list_t *
179bd_get_bdinfo_list(struct devsw *dev)
180{
181	if (dev->dv_type == DEVT_DISK)
182		return (&hdinfo);
183	if (dev->dv_type == DEVT_CD)
184		return (&cdinfo);
185	if (dev->dv_type == DEVT_FD)
186		return (&fdinfo);
187	return (NULL);
188}
189
190/* XXX this gets called way way too often, investigate */
191static bdinfo_t *
192bd_get_bdinfo(struct devdesc *dev)
193{
194	bdinfo_list_t *bdi;
195	bdinfo_t *bd = NULL;
196	int unit;
197
198	bdi = bd_get_bdinfo_list(dev->d_dev);
199	if (bdi == NULL)
200		return (bd);
201
202	unit = 0;
203	STAILQ_FOREACH(bd, bdi, bd_link) {
204		if (unit == dev->d_unit)
205			return (bd);
206		unit++;
207	}
208	return (bd);
209}
210
211/*
212 * Translate between BIOS device numbers and our private unit numbers.
213 */
214int
215bd_bios2unit(int biosdev)
216{
217	bdinfo_list_t *bdi[] = { &fdinfo, &cdinfo, &hdinfo, NULL };
218	bdinfo_t *bd;
219	int i, unit;
220
221	DEBUG("looking for bios device 0x%x", biosdev);
222	for (i = 0; bdi[i] != NULL; i++) {
223		unit = 0;
224		STAILQ_FOREACH(bd, bdi[i], bd_link) {
225			if (bd->bd_unit == biosdev) {
226				DEBUG("bd unit %d is BIOS device 0x%x", unit,
227				    bd->bd_unit);
228				return (unit);
229			}
230			unit++;
231		}
232	}
233	return (-1);
234}
235
236int
237bd_unit2bios(struct i386_devdesc *dev)
238{
239	bdinfo_list_t *bdi;
240	bdinfo_t *bd;
241	int unit;
242
243	bdi = bd_get_bdinfo_list(dev->dd.d_dev);
244	if (bdi == NULL)
245		return (-1);
246
247	unit = 0;
248	STAILQ_FOREACH(bd, bdi, bd_link) {
249		if (unit == dev->dd.d_unit)
250			return (bd->bd_unit);
251		unit++;
252	}
253	return (-1);
254}
255
256/*
257 * Use INT13 AH=15 - Read Drive Type.
258 */
259static int
260fd_count(void)
261{
262	int drive;
263
264	for (drive = 0; drive < MAXBDDEV; drive++) {
265		bd_reset_disk(drive);
266
267		v86.ctl = V86_FLAGS;
268		v86.addr = 0x13;
269		v86.eax = 0x1500;
270		v86.edx = drive;
271		v86int();
272
273		if (V86_CY(v86.efl))
274			break;
275
276		if ((v86.eax & 0x300) == 0)
277			break;
278	}
279
280	return (drive);
281}
282
283/*
284 * Quiz the BIOS for disk devices, save a little info about them.
285 */
286static int
287fd_init(void)
288{
289	int unit, numfd;
290	bdinfo_t *bd;
291
292	numfd = fd_count();
293	for (unit = 0; unit < numfd; unit++) {
294		if ((bd = calloc(1, sizeof(*bd))) == NULL)
295			break;
296
297		bd->bd_sectorsize = BIOSDISK_SECSIZE;
298		bd->bd_flags = BD_FLOPPY;
299		bd->bd_unit = unit;
300
301		/* Use std diskinfo for floppy drive */
302		if (bd_get_diskinfo_std(bd) != 0) {
303			free(bd);
304			break;
305		}
306		if (bd->bd_sectors == 0)
307			bd->bd_flags |= BD_NO_MEDIA;
308
309		printf("BIOS drive %c: is %s%d\n", ('A' + unit),
310		    biosfd.dv_name, unit);
311
312		STAILQ_INSERT_TAIL(&fdinfo, bd, bd_link);
313	}
314
315	bcache_add_dev(unit);
316	return (0);
317}
318
319static int
320bd_init(void)
321{
322	int base, unit;
323	bdinfo_t *bd;
324
325	base = 0x80;
326	for (unit = 0; unit < *(unsigned char *)PTOV(BIOS_NUMDRIVES); unit++) {
327		/*
328		 * Check the BIOS equipment list for number of fixed disks.
329		 */
330		if ((bd = calloc(1, sizeof(*bd))) == NULL)
331			break;
332		bd->bd_unit = base + unit;
333		if (!bd_int13probe(bd)) {
334			free(bd);
335			break;
336		}
337
338		printf("BIOS drive %c: is %s%d\n", ('C' + unit),
339		    bioshd.dv_name, unit);
340
341		STAILQ_INSERT_TAIL(&hdinfo, bd, bd_link);
342	}
343	bcache_add_dev(unit);
344	return (0);
345}
346
347/*
348 * We can't quiz, we have to be told what device to use, so this function
349 * doesn't do anything.  Instead, the loader calls bc_add() with the BIOS
350 * device number to add.
351 */
352static int
353cd_init(void)
354{
355
356	return (0);
357}
358
359int
360bc_add(int biosdev)
361{
362	bdinfo_t *bd;
363	struct specification_packet bc_sp;
364	int nbcinfo = 0;
365
366	if (!STAILQ_EMPTY(&cdinfo))
367                return (-1);
368
369        v86.ctl = V86_FLAGS;
370        v86.addr = 0x13;
371        v86.eax = 0x4b01;
372        v86.edx = biosdev;
373        v86.ds = VTOPSEG(&bc_sp);
374        v86.esi = VTOPOFF(&bc_sp);
375        v86int();
376        if ((v86.eax & 0xff00) != 0)
377                return (-1);
378
379	if ((bd = calloc(1, sizeof(*bd))) == NULL)
380		return (-1);
381
382	bd->bd_flags = BD_CDROM;
383        bd->bd_unit = biosdev;
384
385	/*
386	 * Ignore result from bd_int13probe(), we will use local
387	 * workaround below.
388	 */
389	(void)bd_int13probe(bd);
390
391	if (bd->bd_cyl == 0) {
392		bd->bd_cyl = ((bc_sp.sp_cylsec & 0xc0) << 2) +
393		    ((bc_sp.sp_cylsec & 0xff00) >> 8) + 1;
394	}
395	if (bd->bd_hds == 0)
396		bd->bd_hds = bc_sp.sp_head + 1;
397	if (bd->bd_sec == 0)
398		bd->bd_sec = bc_sp.sp_cylsec & 0x3f;
399	if (bd->bd_sectors == 0)
400		bd->bd_sectors = (uint64_t)bd->bd_cyl * bd->bd_hds * bd->bd_sec;
401
402	/* Still no size? use 7.961GB */
403	if (bd->bd_sectors == 0)
404		bd->bd_sectors = 4173824;
405
406	STAILQ_INSERT_TAIL(&cdinfo, bd, bd_link);
407        printf("BIOS CD is cd%d\n", nbcinfo);
408        nbcinfo++;
409        bcache_add_dev(nbcinfo);        /* register cd device in bcache */
410        return(0);
411}
412
413/*
414 * Return EDD version or 0 if EDD is not supported on this drive.
415 */
416static int
417bd_check_extensions(int unit)
418{
419	/* do not use ext calls for floppy devices */
420	if (unit < 0x80)
421		return (0);
422
423	/* Determine if we can use EDD with this device. */
424	v86.ctl = V86_FLAGS;
425	v86.addr = 0x13;
426	v86.eax = 0x4100;
427	v86.edx = unit;
428	v86.ebx = 0x55aa;
429	v86int();
430
431	if (V86_CY(v86.efl) ||			/* carry set */
432	    (v86.ebx & 0xffff) != 0xaa55)	/* signature */
433		return (0);
434
435	/* extended disk access functions (AH=42h-44h,47h,48h) supported */
436	if ((v86.ecx & EDD_INTERFACE_FIXED_DISK) == 0)
437		return (0);
438
439	return ((v86.eax >> 8) & 0xff);
440}
441
442static void
443bd_reset_disk(int unit)
444{
445	/* reset disk */
446	v86.ctl = V86_FLAGS;
447	v86.addr = 0x13;
448	v86.eax = 0;
449	v86.edx = unit;
450	v86int();
451}
452
453/*
454 * Read CHS info. Return 0 on success, error otherwise.
455 */
456static int
457bd_get_diskinfo_std(struct bdinfo *bd)
458{
459	bzero(&v86, sizeof(v86));
460	v86.ctl = V86_FLAGS;
461	v86.addr = 0x13;
462	v86.eax = 0x800;
463	v86.edx = bd->bd_unit;
464	v86int();
465
466	if (V86_CY(v86.efl) && ((v86.eax & 0xff00) != 0))
467		return ((v86.eax & 0xff00) >> 8);
468
469	/* return custom error on absurd sector number */
470	if ((v86.ecx & 0x3f) == 0)
471		return (0x60);
472
473	bd->bd_cyl = ((v86.ecx & 0xc0) << 2) + ((v86.ecx & 0xff00) >> 8) + 1;
474	/* Convert max head # -> # of heads */
475	bd->bd_hds = ((v86.edx & 0xff00) >> 8) + 1;
476	bd->bd_sec = v86.ecx & 0x3f;
477	bd->bd_type = v86.ebx;
478	bd->bd_sectors = (uint64_t)bd->bd_cyl * bd->bd_hds * bd->bd_sec;
479
480	return (0);
481}
482
483/*
484 * Read EDD info. Return 0 on success, error otherwise.
485 */
486static int
487bd_get_diskinfo_ext(struct bdinfo *bd)
488{
489	struct edd_params params;
490	uint64_t total;
491
492	/* Get disk params */
493	bzero(&params, sizeof(params));
494	params.len = sizeof(params);
495	v86.ctl = V86_FLAGS;
496	v86.addr = 0x13;
497	v86.eax = 0x4800;
498	v86.edx = bd->bd_unit;
499	v86.ds = VTOPSEG(&params);
500	v86.esi = VTOPOFF(&params);
501	v86int();
502
503	if (V86_CY(v86.efl) && ((v86.eax & 0xff00) != 0))
504		return ((v86.eax & 0xff00) >> 8);
505
506	/*
507	 * Sector size must be a multiple of 512 bytes.
508	 * An alternate test would be to check power of 2,
509	 * powerof2(params.sector_size).
510	 * 16K is largest read buffer we can use at this time.
511	 */
512	if (params.sector_size >= 512 &&
513	    params.sector_size <= 16384 &&
514	    (params.sector_size % BIOSDISK_SECSIZE) == 0)
515		bd->bd_sectorsize = params.sector_size;
516
517	bd->bd_cyl = params.cylinders;
518	bd->bd_hds = params.heads;
519	bd->bd_sec = params.sectors_per_track;
520
521	if (params.sectors != 0) {
522		total = params.sectors;
523	} else {
524		total = (uint64_t)params.cylinders *
525		    params.heads * params.sectors_per_track;
526	}
527	bd->bd_sectors = total;
528
529	return (0);
530}
531
532/*
533 * Try to detect a device supported by the legacy int13 BIOS
534 */
535static bool
536bd_int13probe(bdinfo_t *bd)
537{
538	int edd, ret;
539
540	bd->bd_flags &= ~BD_NO_MEDIA;
541
542	edd = bd_check_extensions(bd->bd_unit);
543	if (edd == 0)
544		bd->bd_flags |= BD_MODEINT13;
545	else if (edd < 0x30)
546		bd->bd_flags |= BD_MODEEDD1;
547	else
548		bd->bd_flags |= BD_MODEEDD3;
549
550	/* Default sector size */
551	bd->bd_sectorsize = BIOSDISK_SECSIZE;
552
553	/*
554	 * Test if the floppy device is present, so we can avoid receiving
555	 * bogus information from bd_get_diskinfo_std().
556	 */
557	if (bd->bd_unit < 0x80) {
558		/* reset disk */
559		bd_reset_disk(bd->bd_unit);
560
561		/* Get disk type */
562		v86.ctl = V86_FLAGS;
563		v86.addr = 0x13;
564		v86.eax = 0x1500;
565		v86.edx = bd->bd_unit;
566		v86int();
567		if (V86_CY(v86.efl) || (v86.eax & 0x300) == 0)
568			return (false);
569	}
570
571	ret = 1;
572	if (edd != 0)
573		ret = bd_get_diskinfo_ext(bd);
574	if (ret != 0 || bd->bd_sectors == 0)
575		ret = bd_get_diskinfo_std(bd);
576
577	if (ret != 0 && bd->bd_unit < 0x80) {
578		/* Set defaults for 1.44 floppy */
579		bd->bd_cyl = 80;
580		bd->bd_hds = 2;
581		bd->bd_sec = 18;
582		bd->bd_sectors = 2880;
583		/* Since we are there, there most likely is no media */
584		bd->bd_flags |= BD_NO_MEDIA;
585		ret = 0;
586	}
587
588	if (ret != 0) {
589		/* CD is special case, bc_add() has its own fallback. */
590		if ((bd->bd_flags & BD_CDROM) != 0)
591			return (true);
592
593		if (bd->bd_sectors != 0 && edd != 0) {
594			bd->bd_sec = 63;
595			bd->bd_hds = 255;
596			bd->bd_cyl =
597			    (bd->bd_sectors + bd->bd_sec * bd->bd_hds - 1) /
598			    bd->bd_sec * bd->bd_hds;
599		} else {
600			const char *dv_name;
601
602			if ((bd->bd_flags & BD_FLOPPY) != 0)
603				dv_name = biosfd.dv_name;
604			else if ((bd->bd_flags & BD_CDROM) != 0)
605				dv_name = bioscd.dv_name;
606			else
607				dv_name = bioshd.dv_name;
608
609			printf("Can not get information about %s unit %#x\n",
610			    dv_name, bd->bd_unit);
611			return (false);
612		}
613	}
614
615	if (bd->bd_sec == 0)
616		bd->bd_sec = 63;
617	if (bd->bd_hds == 0)
618		bd->bd_hds = 255;
619
620	if (bd->bd_sectors == 0)
621		bd->bd_sectors = (uint64_t)bd->bd_cyl * bd->bd_hds * bd->bd_sec;
622
623	DEBUG("unit 0x%x geometry %d/%d/%d\n", bd->bd_unit, bd->bd_cyl,
624	    bd->bd_hds, bd->bd_sec);
625
626	return (true);
627}
628
629static int
630bd_count(bdinfo_list_t *bdi)
631{
632	bdinfo_t *bd;
633	int i;
634
635	i = 0;
636	STAILQ_FOREACH(bd, bdi, bd_link)
637		i++;
638	return (i);
639}
640
641/*
642 * Print information about disks
643 */
644static int
645bd_print_common(struct devsw *dev, bdinfo_list_t *bdi, int verbose)
646{
647	char line[80];
648	struct disk_devdesc devd;
649	bdinfo_t *bd;
650	int i, ret = 0;
651	char drive;
652
653	if (STAILQ_EMPTY(bdi))
654		return (0);
655
656	printf("%s devices:", dev->dv_name);
657	if ((ret = pager_output("\n")) != 0)
658		return (ret);
659
660	i = -1;
661	STAILQ_FOREACH(bd, bdi, bd_link) {
662		i++;
663
664		switch (dev->dv_type) {
665		case DEVT_FD:
666			drive = 'A';
667			break;
668		case DEVT_CD:
669			drive = 'C' + bd_count(&hdinfo);
670			break;
671		default:
672			drive = 'C';
673			break;
674		}
675
676		snprintf(line, sizeof(line),
677		    "    %s%d:   BIOS drive %c (%s%ju X %u):\n",
678		    dev->dv_name, i, drive + i,
679		    (bd->bd_flags & BD_NO_MEDIA) == BD_NO_MEDIA ?
680		    "no media, " : "",
681		    (uintmax_t)bd->bd_sectors,
682		    bd->bd_sectorsize);
683		if ((ret = pager_output(line)) != 0)
684			break;
685
686		if ((bd->bd_flags & BD_NO_MEDIA) == BD_NO_MEDIA)
687			continue;
688
689		if (dev->dv_type != DEVT_DISK)
690			continue;
691
692		devd.dd.d_dev = dev;
693		devd.dd.d_unit = i;
694		devd.d_slice = -1;
695		devd.d_partition = -1;
696		if (disk_open(&devd,
697		    bd->bd_sectorsize * bd->bd_sectors,
698		    bd->bd_sectorsize) == 0) {
699			snprintf(line, sizeof(line), "    %s%d",
700			    dev->dv_name, i);
701			ret = disk_print(&devd, line, verbose);
702			disk_close(&devd);
703			if (ret != 0)
704				break;
705		}
706	}
707	return (ret);
708}
709
710static int
711fd_print(int verbose)
712{
713	return (bd_print_common(&biosfd, &fdinfo, verbose));
714}
715
716static int
717bd_print(int verbose)
718{
719	return (bd_print_common(&bioshd, &hdinfo, verbose));
720}
721
722static int
723cd_print(int verbose)
724{
725	return (bd_print_common(&bioscd, &cdinfo, verbose));
726}
727
728/*
729 * Read disk size from partition.
730 * This is needed to work around buggy BIOS systems returning
731 * wrong (truncated) disk media size.
732 * During bd_probe() we tested if the multiplication of bd_sectors
733 * would overflow so it should be safe to perform here.
734 */
735static uint64_t
736bd_disk_get_sectors(struct disk_devdesc *dev)
737{
738	bdinfo_t *bd;
739	struct disk_devdesc disk;
740	uint64_t size;
741
742	bd = bd_get_bdinfo(&dev->dd);
743	if (bd == NULL)
744		return (0);
745
746	disk.dd.d_dev = dev->dd.d_dev;
747	disk.dd.d_unit = dev->dd.d_unit;
748	disk.d_slice = -1;
749	disk.d_partition = -1;
750	disk.d_offset = 0;
751
752	size = bd->bd_sectors * bd->bd_sectorsize;
753	if (disk_open(&disk, size, bd->bd_sectorsize) == 0) {
754		(void) disk_ioctl(&disk, DIOCGMEDIASIZE, &size);
755		disk_close(&disk);
756	}
757	return (size / bd->bd_sectorsize);
758}
759
760/*
761 * Attempt to open the disk described by (dev) for use by (f).
762 *
763 * Note that the philosophy here is "give them exactly what
764 * they ask for".  This is necessary because being too "smart"
765 * about what the user might want leads to complications.
766 * (eg. given no slice or partition value, with a disk that is
767 *  sliced - are they after the first BSD slice, or the DOS
768 *  slice before it?)
769 */
770static int
771bd_open(struct open_file *f, ...)
772{
773	bdinfo_t *bd;
774	struct disk_devdesc *dev;
775	va_list ap;
776	int rc;
777
778	va_start(ap, f);
779	dev = va_arg(ap, struct disk_devdesc *);
780	va_end(ap);
781
782	bd = bd_get_bdinfo(&dev->dd);
783	if (bd == NULL)
784		return (EIO);
785
786	if ((bd->bd_flags & BD_NO_MEDIA) == BD_NO_MEDIA) {
787		if (!bd_int13probe(bd))
788			return (EIO);
789		if ((bd->bd_flags & BD_NO_MEDIA) == BD_NO_MEDIA)
790			return (EIO);
791	}
792	if (bd->bd_bcache == NULL)
793	    bd->bd_bcache = bcache_allocate();
794
795	if (bd->bd_open == 0)
796		bd->bd_sectors = bd_disk_get_sectors(dev);
797	bd->bd_open++;
798
799	rc = 0;
800	if (dev->dd.d_dev->dv_type == DEVT_DISK) {
801		rc = disk_open(dev, bd->bd_sectors * bd->bd_sectorsize,
802		    bd->bd_sectorsize);
803		if (rc != 0) {
804			bd->bd_open--;
805			if (bd->bd_open == 0) {
806				bcache_free(bd->bd_bcache);
807				bd->bd_bcache = NULL;
808			}
809		}
810	}
811	return (rc);
812}
813
814static int
815bd_close(struct open_file *f)
816{
817	struct disk_devdesc *dev;
818	bdinfo_t *bd;
819	int rc = 0;
820
821	dev = (struct disk_devdesc *)f->f_devdata;
822	bd = bd_get_bdinfo(&dev->dd);
823	if (bd == NULL)
824		return (EIO);
825
826	bd->bd_open--;
827	if (bd->bd_open == 0) {
828	    bcache_free(bd->bd_bcache);
829	    bd->bd_bcache = NULL;
830	}
831	if (dev->dd.d_dev->dv_type == DEVT_DISK)
832		rc = disk_close(dev);
833	return (rc);
834}
835
836static int
837bd_ioctl(struct open_file *f, u_long cmd, void *data)
838{
839	bdinfo_t *bd;
840	struct disk_devdesc *dev;
841	int rc;
842
843	dev = (struct disk_devdesc *)f->f_devdata;
844	bd = bd_get_bdinfo(&dev->dd);
845	if (bd == NULL)
846		return (EIO);
847
848	if (dev->dd.d_dev->dv_type == DEVT_DISK) {
849		rc = disk_ioctl(dev, cmd, data);
850		if (rc != ENOTTY)
851			return (rc);
852	}
853
854	switch (cmd) {
855	case DIOCGSECTORSIZE:
856		*(uint32_t *)data = bd->bd_sectorsize;
857		break;
858	case DIOCGMEDIASIZE:
859		*(uint64_t *)data = bd->bd_sectors * bd->bd_sectorsize;
860		break;
861	default:
862		return (ENOTTY);
863	}
864	return (0);
865}
866
867static int
868bd_strategy(void *devdata, int rw, daddr_t dblk, size_t size,
869    char *buf, size_t *rsize)
870{
871	bdinfo_t *bd;
872	struct bcache_devdata bcd;
873	struct disk_devdesc *dev;
874	daddr_t offset;
875
876	dev = (struct disk_devdesc *)devdata;
877	bd = bd_get_bdinfo(&dev->dd);
878	if (bd == NULL)
879		return (EINVAL);
880
881	bcd.dv_strategy = bd_realstrategy;
882	bcd.dv_devdata = devdata;
883	bcd.dv_cache = bd->bd_bcache;
884
885	offset = 0;
886	if (dev->dd.d_dev->dv_type == DEVT_DISK) {
887
888		offset = dev->d_offset * bd->bd_sectorsize;
889		offset /= BIOSDISK_SECSIZE;
890	}
891	return (bcache_strategy(&bcd, rw, dblk + offset, size,
892	    buf, rsize));
893}
894
895static int
896bd_realstrategy(void *devdata, int rw, daddr_t dblk, size_t size,
897    char *buf, size_t *rsize)
898{
899	struct disk_devdesc *dev = (struct disk_devdesc *)devdata;
900	bdinfo_t *bd;
901	uint64_t disk_blocks, offset, d_offset;
902	size_t blks, blkoff, bsize, bio_size, rest;
903	caddr_t bbuf = NULL;
904	int rc;
905
906	bd = bd_get_bdinfo(&dev->dd);
907	if (bd == NULL || (bd->bd_flags & BD_NO_MEDIA) == BD_NO_MEDIA)
908		return (EIO);
909
910	/*
911	 * First make sure the IO size is a multiple of 512 bytes. While we do
912	 * process partial reads below, the strategy mechanism is built
913	 * assuming IO is a multiple of 512B blocks. If the request is not
914	 * a multiple of 512B blocks, it has to be some sort of bug.
915	 */
916	if (size == 0 || (size % BIOSDISK_SECSIZE) != 0) {
917		printf("bd_strategy: %d bytes I/O not multiple of %d\n",
918		    size, BIOSDISK_SECSIZE);
919		return (EIO);
920	}
921
922	DEBUG("open_disk %p", dev);
923
924	offset = dblk * BIOSDISK_SECSIZE;
925	dblk = offset / bd->bd_sectorsize;
926	blkoff = offset % bd->bd_sectorsize;
927
928	/*
929	 * Check the value of the size argument. We do have quite small
930	 * heap (64MB), but we do not know good upper limit, so we check against
931	 * INT_MAX here. This will also protect us against possible overflows
932	 * while translating block count to bytes.
933	 */
934	if (size > INT_MAX) {
935		DEBUG("too large I/O: %zu bytes", size);
936		return (EIO);
937	}
938
939	blks = size / bd->bd_sectorsize;
940	if (blks == 0 || (size % bd->bd_sectorsize) != 0)
941		blks++;
942
943	if (dblk > dblk + blks)
944		return (EIO);
945
946	if (rsize)
947		*rsize = 0;
948
949	/*
950	 * Get disk blocks, this value is either for whole disk or for
951	 * partition.
952	 */
953	d_offset = 0;
954	disk_blocks = 0;
955	if (dev->dd.d_dev->dv_type == DEVT_DISK) {
956		if (disk_ioctl(dev, DIOCGMEDIASIZE, &disk_blocks) == 0) {
957			/* DIOCGMEDIASIZE does return bytes. */
958			disk_blocks /= bd->bd_sectorsize;
959		}
960		d_offset = dev->d_offset;
961	}
962	if (disk_blocks == 0)
963		disk_blocks = bd->bd_sectors - d_offset;
964
965	/* Validate source block address. */
966	if (dblk < d_offset || dblk >= d_offset + disk_blocks)
967		return (EIO);
968
969	/*
970	 * Truncate if we are crossing disk or partition end.
971	 */
972	if (dblk + blks >= d_offset + disk_blocks) {
973		blks = d_offset + disk_blocks - dblk;
974		size = blks * bd->bd_sectorsize;
975		DEBUG("short I/O %d", blks);
976	}
977
978	bio_size = min(BIO_BUFFER_SIZE, size);
979	while (bio_size > bd->bd_sectorsize) {
980		bbuf = bio_alloc(bio_size);
981		if (bbuf != NULL)
982			break;
983		bio_size -= bd->bd_sectorsize;
984	}
985	if (bbuf == NULL) {
986		bio_size = V86_IO_BUFFER_SIZE;
987		if (bio_size / bd->bd_sectorsize == 0)
988			panic("BUG: Real mode buffer is too small");
989
990		/* Use alternate 4k buffer */
991		bbuf = PTOV(V86_IO_BUFFER);
992	}
993	rest = size;
994	rc = 0;
995	while (blks > 0) {
996		int x = min(blks, bio_size / bd->bd_sectorsize);
997
998		switch (rw & F_MASK) {
999		case F_READ:
1000			DEBUG("read %d from %lld to %p", x, dblk, buf);
1001			bsize = bd->bd_sectorsize * x - blkoff;
1002			if (rest < bsize)
1003				bsize = rest;
1004
1005			if ((rc = bd_io(dev, bd, dblk, x, bbuf, BD_RD)) != 0) {
1006				rc = EIO;
1007				goto error;
1008			}
1009
1010			bcopy(bbuf + blkoff, buf, bsize);
1011			break;
1012		case F_WRITE :
1013			DEBUG("write %d from %lld to %p", x, dblk, buf);
1014			if (blkoff != 0) {
1015				/*
1016				 * We got offset to sector, read 1 sector to
1017				 * bbuf.
1018				 */
1019				x = 1;
1020				bsize = bd->bd_sectorsize - blkoff;
1021				bsize = min(bsize, rest);
1022				rc = bd_io(dev, bd, dblk, x, bbuf, BD_RD);
1023			} else if (rest < bd->bd_sectorsize) {
1024				/*
1025				 * The remaining block is not full
1026				 * sector. Read 1 sector to bbuf.
1027				 */
1028				x = 1;
1029				bsize = rest;
1030				rc = bd_io(dev, bd, dblk, x, bbuf, BD_RD);
1031			} else {
1032				/* We can write full sector(s). */
1033				bsize = bd->bd_sectorsize * x;
1034			}
1035			/*
1036			 * Put your Data In, Put your Data out,
1037			 * Put your Data In, and shake it all about
1038			 */
1039			bcopy(buf, bbuf + blkoff, bsize);
1040			if ((rc = bd_io(dev, bd, dblk, x, bbuf, BD_WR)) != 0) {
1041				rc = EIO;
1042				goto error;
1043			}
1044
1045			break;
1046		default:
1047			/* DO NOTHING */
1048			rc = EROFS;
1049			goto error;
1050		}
1051
1052		blkoff = 0;
1053		buf += bsize;
1054		rest -= bsize;
1055		blks -= x;
1056		dblk += x;
1057	}
1058
1059	if (rsize != NULL)
1060		*rsize = size;
1061error:
1062	if (bbuf != PTOV(V86_IO_BUFFER))
1063		bio_free(bbuf, bio_size);
1064	return (rc);
1065}
1066
1067static int
1068bd_edd_io(bdinfo_t *bd, daddr_t dblk, int blks, caddr_t dest,
1069    int dowrite)
1070{
1071	static struct edd_packet packet;
1072
1073	packet.len = sizeof(struct edd_packet);
1074	packet.count = blks;
1075	packet.off = VTOPOFF(dest);
1076	packet.seg = VTOPSEG(dest);
1077	packet.lba = dblk;
1078	v86.ctl = V86_FLAGS;
1079	v86.addr = 0x13;
1080	/* Should we Write with verify ?? 0x4302 ? */
1081	if (dowrite == BD_WR)
1082		v86.eax = 0x4300;
1083	else
1084		v86.eax = 0x4200;
1085	v86.edx = bd->bd_unit;
1086	v86.ds = VTOPSEG(&packet);
1087	v86.esi = VTOPOFF(&packet);
1088	v86int();
1089	if (V86_CY(v86.efl))
1090		return (v86.eax >> 8);
1091	return (0);
1092}
1093
1094static int
1095bd_chs_io(bdinfo_t *bd, daddr_t dblk, int blks, caddr_t dest,
1096    int dowrite)
1097{
1098	uint32_t x, bpc, cyl, hd, sec;
1099
1100	bpc = bd->bd_sec * bd->bd_hds;	/* blocks per cylinder */
1101	x = dblk;
1102	cyl = x / bpc;			/* block # / blocks per cylinder */
1103	x %= bpc;				/* block offset into cylinder */
1104	hd = x / bd->bd_sec;		/* offset / blocks per track */
1105	sec = x % bd->bd_sec;		/* offset into track */
1106
1107	/* correct sector number for 1-based BIOS numbering */
1108	sec++;
1109
1110	if (cyl > 1023) {
1111		/* CHS doesn't support cylinders > 1023. */
1112		return (1);
1113	}
1114
1115	v86.ctl = V86_FLAGS;
1116	v86.addr = 0x13;
1117	if (dowrite == BD_WR)
1118		v86.eax = 0x300 | blks;
1119	else
1120		v86.eax = 0x200 | blks;
1121	v86.ecx = ((cyl & 0xff) << 8) | ((cyl & 0x300) >> 2) | sec;
1122	v86.edx = (hd << 8) | bd->bd_unit;
1123	v86.es = VTOPSEG(dest);
1124	v86.ebx = VTOPOFF(dest);
1125	v86int();
1126	if (V86_CY(v86.efl))
1127		return (v86.eax >> 8);
1128	return (0);
1129}
1130
1131static void
1132bd_io_workaround(bdinfo_t *bd)
1133{
1134	uint8_t buf[8 * 1024];
1135
1136	bd_edd_io(bd, 0xffffffff, 1, (caddr_t)buf, BD_RD);
1137}
1138
1139static int
1140bd_io(struct disk_devdesc *dev, bdinfo_t *bd, daddr_t dblk, int blks,
1141    caddr_t dest, int dowrite)
1142{
1143	int result, retry;
1144
1145	/* Just in case some idiot actually tries to read/write -1 blocks... */
1146	if (blks < 0)
1147		return (-1);
1148
1149	/*
1150	 * Workaround for a problem with some HP ProLiant BIOS failing to work
1151	 * out the boot disk after installation. hrs and kuriyama discovered
1152	 * this problem with an HP ProLiant DL320e Gen 8 with a 3TB HDD, and
1153	 * discovered that an int13h call seems to cause a buffer overrun in
1154	 * the bios. The problem is alleviated by doing an extra read before
1155	 * the buggy read. It is not immediately known whether other models
1156	 * are similarly affected.
1157	 * Loop retrying the operation a couple of times.  The BIOS
1158	 * may also retry.
1159	 */
1160	if (dowrite == BD_RD && dblk >= 0x100000000)
1161		bd_io_workaround(bd);
1162	for (retry = 0; retry < 3; retry++) {
1163		if (bd->bd_flags & BD_MODEEDD)
1164			result = bd_edd_io(bd, dblk, blks, dest, dowrite);
1165		else
1166			result = bd_chs_io(bd, dblk, blks, dest, dowrite);
1167
1168		if (result == 0) {
1169			if (bd->bd_flags & BD_NO_MEDIA)
1170				bd->bd_flags &= ~BD_NO_MEDIA;
1171			break;
1172		}
1173
1174		bd_reset_disk(bd->bd_unit);
1175
1176		/*
1177		 * Error codes:
1178		 * 20h	controller failure
1179		 * 31h	no media in drive (IBM/MS INT 13 extensions)
1180		 * 80h	no media in drive, VMWare (Fusion)
1181		 * There is no reason to repeat the IO with errors above.
1182		 */
1183		if (result == 0x20 || result == 0x31 || result == 0x80) {
1184			bd->bd_flags |= BD_NO_MEDIA;
1185			break;
1186		}
1187	}
1188
1189	if (result != 0 && (bd->bd_flags & BD_NO_MEDIA) == 0) {
1190		if (dowrite == BD_WR) {
1191			printf("%s%d: Write %d sector(s) from %p (0x%x) "
1192			    "to %lld: 0x%x\n", dev->dd.d_dev->dv_name,
1193			    dev->dd.d_unit, blks, dest, VTOP(dest), dblk,
1194			    result);
1195		} else {
1196			printf("%s%d: Read %d sector(s) from %lld to %p "
1197			    "(0x%x): 0x%x\n", dev->dd.d_dev->dv_name,
1198			    dev->dd.d_unit, blks, dblk, dest, VTOP(dest),
1199			    result);
1200		}
1201	}
1202
1203	return (result);
1204}
1205
1206/*
1207 * Return the BIOS geometry of a given "fixed drive" in a format
1208 * suitable for the legacy bootinfo structure.  Since the kernel is
1209 * expecting raw int 0x13/0x8 values for N_BIOS_GEOM drives, we
1210 * prefer to get the information directly, rather than rely on being
1211 * able to put it together from information already maintained for
1212 * different purposes and for a probably different number of drives.
1213 *
1214 * For valid drives, the geometry is expected in the format (31..0)
1215 * "000000cc cccccccc hhhhhhhh 00ssssss"; and invalid drives are
1216 * indicated by returning the geometry of a "1.2M" PC-format floppy
1217 * disk.  And, incidentally, what is returned is not the geometry as
1218 * such but the highest valid cylinder, head, and sector numbers.
1219 */
1220uint32_t
1221bd_getbigeom(int bunit)
1222{
1223
1224	v86.ctl = V86_FLAGS;
1225	v86.addr = 0x13;
1226	v86.eax = 0x800;
1227	v86.edx = 0x80 + bunit;
1228	v86int();
1229	if (V86_CY(v86.efl))
1230		return (0x4f010f);
1231	return (((v86.ecx & 0xc0) << 18) | ((v86.ecx & 0xff00) << 8) |
1232	    (v86.edx & 0xff00) | (v86.ecx & 0x3f));
1233}
1234
1235/*
1236 * Return a suitable dev_t value for (dev).
1237 *
1238 * In the case where it looks like (dev) is a SCSI disk, we allow the number of
1239 * IDE disks to be specified in $num_ide_disks.  There should be a Better Way.
1240 */
1241int
1242bd_getdev(struct i386_devdesc *d)
1243{
1244	struct disk_devdesc *dev;
1245	bdinfo_t *bd;
1246	int	biosdev;
1247	int	major;
1248	int	rootdev;
1249	char	*nip, *cp;
1250	int	i, unit, slice, partition;
1251
1252	/* XXX: Assume partition 'a'. */
1253	slice = 0;
1254	partition = 0;
1255
1256	dev = (struct disk_devdesc *)d;
1257	bd = bd_get_bdinfo(&dev->dd);
1258	if (bd == NULL)
1259		return (-1);
1260
1261	biosdev = bd_unit2bios(d);
1262	DEBUG("unit %d BIOS device %d", dev->dd.d_unit, biosdev);
1263	if (biosdev == -1)			/* not a BIOS device */
1264		return (-1);
1265
1266	if (dev->dd.d_dev->dv_type == DEVT_DISK) {
1267		if (disk_open(dev, bd->bd_sectors * bd->bd_sectorsize,
1268		    bd->bd_sectorsize) != 0)	/* oops, not a viable device */
1269			return (-1);
1270		else
1271			disk_close(dev);
1272		slice = dev->d_slice + 1;
1273		partition = dev->d_partition;
1274	}
1275
1276	if (biosdev < 0x80) {
1277		/* floppy (or emulated floppy) or ATAPI device */
1278		if (bd->bd_type == DT_ATAPI) {
1279			/* is an ATAPI disk */
1280			major = WFDMAJOR;
1281		} else {
1282			/* is a floppy disk */
1283			major = FDMAJOR;
1284		}
1285	} else {
1286		/* assume an IDE disk */
1287		major = WDMAJOR;
1288	}
1289	/* default root disk unit number */
1290	unit = biosdev & 0x7f;
1291
1292	if (dev->dd.d_dev->dv_type == DEVT_CD) {
1293		/*
1294		 * XXX: Need to examine device spec here to figure out if
1295		 * SCSI or ATAPI.  No idea on how to figure out device number.
1296		 * All we can really pass to the kernel is what bus and device
1297		 * on which bus we were booted from, which dev_t isn't well
1298		 * suited to since those number don't match to unit numbers
1299		 * very well.  We may just need to engage in a hack where
1300		 * we pass -C to the boot args if we are the boot device.
1301		 */
1302		major = ACDMAJOR;
1303		unit = 0;       /* XXX */
1304	}
1305
1306	/* XXX a better kludge to set the root disk unit number */
1307	if ((nip = getenv("root_disk_unit")) != NULL) {
1308		i = strtol(nip, &cp, 0);
1309		/* check for parse error */
1310		if ((cp != nip) && (*cp == 0))
1311			unit = i;
1312	}
1313
1314	rootdev = MAKEBOOTDEV(major, slice, unit, partition);
1315	DEBUG("dev is 0x%x\n", rootdev);
1316	return (rootdev);
1317}
1318