pci_ahci.c revision 256281
1/*-
2 * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 256164 2013-10-08 19:39:21Z dim $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 256164 2013-10-08 19:39:21Z dim $");
31
32#include <sys/param.h>
33#include <sys/linker_set.h>
34#include <sys/stat.h>
35#include <sys/uio.h>
36#include <sys/ioctl.h>
37#include <sys/disk.h>
38#include <sys/ata.h>
39#include <sys/endian.h>
40
41#include <errno.h>
42#include <fcntl.h>
43#include <stdio.h>
44#include <stdlib.h>
45#include <stdint.h>
46#include <string.h>
47#include <strings.h>
48#include <unistd.h>
49#include <assert.h>
50#include <pthread.h>
51#include <inttypes.h>
52
53#include "bhyverun.h"
54#include "pci_emul.h"
55#include "ahci.h"
56#include "block_if.h"
57
58#define	MAX_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
59
60#define	PxSIG_ATA	0x00000101 /* ATA drive */
61#define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
62
63enum sata_fis_type {
64	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
65	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
66	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
67	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
68	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
69	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
70	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
71	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
72};
73
74/*
75 * SCSI opcodes
76 */
77#define	TEST_UNIT_READY		0x00
78#define	REQUEST_SENSE		0x03
79#define	INQUIRY			0x12
80#define	START_STOP_UNIT		0x1B
81#define	PREVENT_ALLOW		0x1E
82#define	READ_CAPACITY		0x25
83#define	READ_10			0x28
84#define	POSITION_TO_ELEMENT	0x2B
85#define	READ_TOC		0x43
86#define	GET_EVENT_STATUS_NOTIFICATION 0x4A
87#define	MODE_SENSE_10		0x5A
88#define	READ_12			0xA8
89#define	READ_CD			0xBE
90
91/*
92 * SCSI mode page codes
93 */
94#define	MODEPAGE_RW_ERROR_RECOVERY	0x01
95#define	MODEPAGE_CD_CAPABILITIES	0x2A
96
97/*
98 * Debug printf
99 */
100#ifdef AHCI_DEBUG
101static FILE *dbg;
102#define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
103#else
104#define DPRINTF(format, arg...)
105#endif
106#define WPRINTF(format, arg...) printf(format, ##arg)
107
108struct ahci_ioreq {
109	struct blockif_req io_req;
110	struct ahci_port *io_pr;
111	STAILQ_ENTRY(ahci_ioreq) io_list;
112	uint8_t *cfis;
113	uint32_t len;
114	uint32_t done;
115	int slot;
116	int prdtl;
117};
118
119struct ahci_port {
120	struct blockif_ctxt *bctx;
121	struct pci_ahci_softc *pr_sc;
122	uint8_t *cmd_lst;
123	uint8_t *rfis;
124	int atapi;
125	int reset;
126	int mult_sectors;
127	uint8_t xfermode;
128	uint8_t sense_key;
129	uint8_t asc;
130
131	uint32_t clb;
132	uint32_t clbu;
133	uint32_t fb;
134	uint32_t fbu;
135	uint32_t is;
136	uint32_t ie;
137	uint32_t cmd;
138	uint32_t unused0;
139	uint32_t tfd;
140	uint32_t sig;
141	uint32_t ssts;
142	uint32_t sctl;
143	uint32_t serr;
144	uint32_t sact;
145	uint32_t ci;
146	uint32_t sntf;
147	uint32_t fbs;
148
149	/*
150	 * i/o request info
151	 */
152	struct ahci_ioreq *ioreq;
153	int ioqsz;
154	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
155};
156
157struct ahci_cmd_hdr {
158	uint16_t flags;
159	uint16_t prdtl;
160	uint32_t prdbc;
161	uint64_t ctba;
162	uint32_t reserved[4];
163};
164
165struct ahci_prdt_entry {
166	uint64_t dba;
167	uint32_t reserved;
168	uint32_t dbc;
169};
170
171struct pci_ahci_softc {
172	struct pci_devinst *asc_pi;
173	pthread_mutex_t	mtx;
174	int ports;
175	uint32_t cap;
176	uint32_t ghc;
177	uint32_t is;
178	uint32_t pi;
179	uint32_t vs;
180	uint32_t ccc_ctl;
181	uint32_t ccc_pts;
182	uint32_t em_loc;
183	uint32_t em_ctl;
184	uint32_t cap2;
185	uint32_t bohc;
186	struct ahci_port port[MAX_PORTS];
187};
188#define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
189
190static inline void lba_to_msf(uint8_t *buf, int lba)
191{
192	lba += 150;
193	buf[0] = (lba / 75) / 60;
194	buf[1] = (lba / 75) % 60;
195	buf[2] = lba % 75;
196}
197
198/*
199 * generate HBA intr depending on whether or not ports within
200 * the controller have an interrupt pending.
201 */
202static void
203ahci_generate_intr(struct pci_ahci_softc *sc)
204{
205	int i;
206
207	for (i = 0; i < sc->ports; i++) {
208		struct ahci_port *pr;
209		pr = &sc->port[i];
210		if (pr->is & pr->ie)
211			sc->is |= (1 << i);
212	}
213
214	DPRINTF("%s %x\n", __func__, sc->is);
215
216	if (sc->is && (sc->ghc & AHCI_GHC_IE))
217		pci_generate_msi(sc->asc_pi, 0);
218}
219
220static void
221ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
222{
223	int offset, len, irq;
224
225	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
226		return;
227
228	switch (ft) {
229	case FIS_TYPE_REGD2H:
230		offset = 0x40;
231		len = 20;
232		irq = AHCI_P_IX_DHR;
233		break;
234	case FIS_TYPE_SETDEVBITS:
235		offset = 0x58;
236		len = 8;
237		irq = AHCI_P_IX_SDB;
238		break;
239	case FIS_TYPE_PIOSETUP:
240		offset = 0x20;
241		len = 20;
242		irq = 0;
243		break;
244	default:
245		WPRINTF("unsupported fis type %d\n", ft);
246		return;
247	}
248	memcpy(p->rfis + offset, fis, len);
249	if (irq) {
250		p->is |= irq;
251		ahci_generate_intr(p->pr_sc);
252	}
253}
254
255static void
256ahci_write_fis_sdb(struct ahci_port *p, int slot, uint32_t tfd)
257{
258	uint8_t fis[8];
259	uint8_t error;
260
261	error = (tfd >> 8) & 0xff;
262	memset(fis, 0, sizeof(fis));
263	fis[0] = error;
264	fis[2] = tfd & 0x77;
265	*(uint32_t *)(fis + 4) = (1 << slot);
266	if (fis[2] & ATA_S_ERROR)
267		p->is |= AHCI_P_IX_TFE;
268	p->tfd = tfd;
269	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
270}
271
272static void
273ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
274{
275	uint8_t fis[20];
276	uint8_t error;
277
278	error = (tfd >> 8) & 0xff;
279	memset(fis, 0, sizeof(fis));
280	fis[0] = FIS_TYPE_REGD2H;
281	fis[1] = (1 << 6);
282	fis[2] = tfd & 0xff;
283	fis[3] = error;
284	fis[4] = cfis[4];
285	fis[5] = cfis[5];
286	fis[6] = cfis[6];
287	fis[7] = cfis[7];
288	fis[8] = cfis[8];
289	fis[9] = cfis[9];
290	fis[10] = cfis[10];
291	fis[11] = cfis[11];
292	fis[12] = cfis[12];
293	fis[13] = cfis[13];
294	if (fis[2] & ATA_S_ERROR)
295		p->is |= AHCI_P_IX_TFE;
296	p->tfd = tfd;
297	p->ci &= ~(1 << slot);
298	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
299}
300
301static void
302ahci_write_reset_fis_d2h(struct ahci_port *p)
303{
304	uint8_t fis[20];
305
306	memset(fis, 0, sizeof(fis));
307	fis[0] = FIS_TYPE_REGD2H;
308	fis[3] = 1;
309	fis[4] = 1;
310	if (p->atapi) {
311		fis[5] = 0x14;
312		fis[6] = 0xeb;
313	}
314	fis[12] = 1;
315	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
316}
317
318static void
319ahci_port_reset(struct ahci_port *pr)
320{
321	pr->sctl = 0;
322	pr->serr = 0;
323	pr->sact = 0;
324	pr->xfermode = ATA_UDMA6;
325	pr->mult_sectors = 128;
326
327	if (!pr->bctx) {
328		pr->ssts = ATA_SS_DET_NO_DEVICE;
329		pr->sig = 0xFFFFFFFF;
330		pr->tfd = 0x7F;
331		return;
332	}
333	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_SPD_GEN2 |
334		ATA_SS_IPM_ACTIVE;
335	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
336	if (!pr->atapi) {
337		pr->sig = PxSIG_ATA;
338		pr->tfd |= ATA_S_READY;
339	} else
340		pr->sig = PxSIG_ATAPI;
341	ahci_write_reset_fis_d2h(pr);
342}
343
344static void
345ahci_reset(struct pci_ahci_softc *sc)
346{
347	int i;
348
349	sc->ghc = AHCI_GHC_AE;
350	sc->is = 0;
351	for (i = 0; i < sc->ports; i++) {
352		sc->port[i].ie = 0;
353		sc->port[i].is = 0;
354		ahci_port_reset(&sc->port[i]);
355	}
356}
357
358static void
359ata_string(uint8_t *dest, const char *src, int len)
360{
361	int i;
362
363	for (i = 0; i < len; i++) {
364		if (*src)
365			dest[i ^ 1] = *src++;
366		else
367			dest[i ^ 1] = ' ';
368	}
369}
370
371static void
372atapi_string(uint8_t *dest, const char *src, int len)
373{
374	int i;
375
376	for (i = 0; i < len; i++) {
377		if (*src)
378			dest[i] = *src++;
379		else
380			dest[i] = ' ';
381	}
382}
383
384static void
385ahci_handle_dma(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done,
386    int seek)
387{
388	struct ahci_ioreq *aior;
389	struct blockif_req *breq;
390	struct pci_ahci_softc *sc;
391	struct ahci_prdt_entry *prdt;
392	struct ahci_cmd_hdr *hdr;
393	uint64_t lba;
394	uint32_t len;
395	int i, err, iovcnt, ncq, readop;
396
397	sc = p->pr_sc;
398	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
399	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
400	ncq = 0;
401	readop = 1;
402
403	prdt += seek;
404	if (cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
405			cfis[2] == ATA_WRITE_FPDMA_QUEUED)
406		readop = 0;
407
408	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
409			cfis[2] == ATA_READ_FPDMA_QUEUED) {
410		lba = ((uint64_t)cfis[10] << 40) |
411			((uint64_t)cfis[9] << 32) |
412			((uint64_t)cfis[8] << 24) |
413			((uint64_t)cfis[6] << 16) |
414			((uint64_t)cfis[5] << 8) |
415			cfis[4];
416		len = cfis[11] << 8 | cfis[3];
417		if (!len)
418			len = 65536;
419		ncq = 1;
420	} else if (cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
421		lba = ((uint64_t)cfis[10] << 40) |
422			((uint64_t)cfis[9] << 32) |
423			((uint64_t)cfis[8] << 24) |
424			((uint64_t)cfis[6] << 16) |
425			((uint64_t)cfis[5] << 8) |
426			cfis[4];
427		len = cfis[13] << 8 | cfis[12];
428		if (!len)
429			len = 65536;
430	} else {
431		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
432			(cfis[5] << 8) | cfis[4];
433		len = cfis[12];
434		if (!len)
435			len = 256;
436	}
437	lba *= blockif_sectsz(p->bctx);
438	len *= blockif_sectsz(p->bctx);
439
440	/*
441	 * Pull request off free list
442	 */
443	aior = STAILQ_FIRST(&p->iofhd);
444	assert(aior != NULL);
445	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
446	aior->cfis = cfis;
447	aior->slot = slot;
448	aior->len = len;
449	aior->done = done;
450	breq = &aior->io_req;
451	breq->br_offset = lba + done;
452	iovcnt = hdr->prdtl - seek;
453	if (iovcnt > BLOCKIF_IOV_MAX) {
454		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
455		iovcnt = BLOCKIF_IOV_MAX;
456	} else
457		aior->prdtl = 0;
458	breq->br_iovcnt = iovcnt;
459
460	/*
461	 * Build up the iovec based on the prdt
462	 */
463	for (i = 0; i < iovcnt; i++) {
464		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
465				prdt->dba, prdt->dbc + 1);
466		breq->br_iov[i].iov_len = prdt->dbc + 1;
467		aior->done += (prdt->dbc + 1);
468		prdt++;
469	}
470	if (readop)
471		err = blockif_read(p->bctx, breq);
472	else
473		err = blockif_write(p->bctx, breq);
474	assert(err == 0);
475
476	if (!aior->prdtl && ncq)
477		p->ci &= ~(1 << slot);
478}
479
480static void
481ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
482{
483	struct ahci_ioreq *aior;
484	struct blockif_req *breq;
485	int err;
486
487	/*
488	 * Pull request off free list
489	 */
490	aior = STAILQ_FIRST(&p->iofhd);
491	assert(aior != NULL);
492	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
493	aior->cfis = cfis;
494	aior->slot = slot;
495	aior->len = 0;
496	breq = &aior->io_req;
497
498	err = blockif_flush(p->bctx, breq);
499	assert(err == 0);
500}
501
502static inline void
503write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
504		void *buf, int size)
505{
506	struct ahci_cmd_hdr *hdr;
507	struct ahci_prdt_entry *prdt;
508	void *from;
509	int i, len;
510
511	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
512	len = size;
513	from = buf;
514	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
515	for (i = 0; i < hdr->prdtl && len; i++) {
516		uint8_t *ptr = paddr_guest2host(ahci_ctx(p->pr_sc),
517				prdt->dba, prdt->dbc + 1);
518		memcpy(ptr, from, prdt->dbc + 1);
519		len -= (prdt->dbc + 1);
520		from += (prdt->dbc + 1);
521		prdt++;
522	}
523	hdr->prdbc = size - len;
524}
525
526static void
527handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
528{
529	struct ahci_cmd_hdr *hdr;
530
531	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
532	if (p->atapi || hdr->prdtl == 0) {
533		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
534		p->is |= AHCI_P_IX_TFE;
535	} else {
536		uint16_t buf[256];
537		uint64_t sectors;
538
539		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
540		memset(buf, 0, sizeof(buf));
541		buf[0] = 0x0040;
542		/* TODO emulate different serial? */
543		ata_string((uint8_t *)(buf+10), "123456", 20);
544		ata_string((uint8_t *)(buf+23), "001", 8);
545		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
546		buf[47] = (0x8000 | 128);
547		buf[48] = 0x1;
548		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
549		buf[50] = (1 << 14);
550		buf[53] = (1 << 1 | 1 << 2);
551		if (p->mult_sectors)
552			buf[59] = (0x100 | p->mult_sectors);
553		buf[60] = sectors;
554		buf[61] = (sectors >> 16);
555		buf[63] = 0x7;
556		if (p->xfermode & ATA_WDMA0)
557			buf[63] |= (1 << ((p->xfermode & 7) + 8));
558		buf[64] = 0x3;
559		buf[65] = 100;
560		buf[66] = 100;
561		buf[67] = 100;
562		buf[68] = 100;
563		buf[75] = 31;
564		buf[76] = (1 << 8 | 1 << 2);
565		buf[80] = 0x1f0;
566		buf[81] = 0x28;
567		buf[82] = (1 << 5 | 1 << 14);
568		buf[83] = (1 << 10 | 1 << 12 | 1 << 13 | 1 << 14);
569		buf[84] = (1 << 14);
570		buf[85] = (1 << 5 | 1 << 14);
571		buf[86] = (1 << 10 | 1 << 12 | 1 << 13);
572		buf[87] = (1 << 14);
573		buf[88] = 0x7f;
574		if (p->xfermode & ATA_UDMA0)
575			buf[88] |= (1 << ((p->xfermode & 7) + 8));
576		buf[93] = (1 | 1 <<14);
577		buf[100] = sectors;
578		buf[101] = (sectors >> 16);
579		buf[102] = (sectors >> 32);
580		buf[103] = (sectors >> 48);
581		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
582		p->tfd = ATA_S_DSC | ATA_S_READY;
583		p->is |= AHCI_P_IX_DP;
584	}
585	p->ci &= ~(1 << slot);
586	ahci_generate_intr(p->pr_sc);
587}
588
589static void
590handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
591{
592	if (!p->atapi) {
593		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
594		p->is |= AHCI_P_IX_TFE;
595	} else {
596		uint16_t buf[256];
597
598		memset(buf, 0, sizeof(buf));
599		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
600		/* TODO emulate different serial? */
601		ata_string((uint8_t *)(buf+10), "123456", 20);
602		ata_string((uint8_t *)(buf+23), "001", 8);
603		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
604		buf[49] = (1 << 9 | 1 << 8);
605		buf[50] = (1 << 14 | 1);
606		buf[53] = (1 << 2 | 1 << 1);
607		buf[62] = 0x3f;
608		buf[63] = 7;
609		buf[64] = 3;
610		buf[65] = 100;
611		buf[66] = 100;
612		buf[67] = 100;
613		buf[68] = 100;
614		buf[76] = (1 << 2 | 1 << 1);
615		buf[78] = (1 << 5);
616		buf[80] = (0x1f << 4);
617		buf[82] = (1 << 4);
618		buf[83] = (1 << 14);
619		buf[84] = (1 << 14);
620		buf[85] = (1 << 4);
621		buf[87] = (1 << 14);
622		buf[88] = (1 << 14 | 0x7f);
623		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
624		p->tfd = ATA_S_DSC | ATA_S_READY;
625		p->is |= AHCI_P_IX_DHR;
626	}
627	p->ci &= ~(1 << slot);
628	ahci_generate_intr(p->pr_sc);
629}
630
631static void
632atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
633{
634	uint8_t buf[36];
635	uint8_t *acmd;
636	int len;
637
638	acmd = cfis + 0x40;
639
640	buf[0] = 0x05;
641	buf[1] = 0x80;
642	buf[2] = 0x00;
643	buf[3] = 0x21;
644	buf[4] = 31;
645	buf[5] = 0;
646	buf[6] = 0;
647	buf[7] = 0;
648	atapi_string(buf + 8, "BHYVE", 8);
649	atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
650	atapi_string(buf + 32, "001", 4);
651
652	len = sizeof(buf);
653	if (len > acmd[4])
654		len = acmd[4];
655	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
656	write_prdt(p, slot, cfis, buf, len);
657	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
658}
659
660static void
661atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
662{
663	uint8_t buf[8];
664	uint64_t sectors;
665
666	sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
667	sectors >>= 2;
668	be32enc(buf, sectors - 1);
669	be32enc(buf + 4, 2048);
670	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
671	write_prdt(p, slot, cfis, buf, sizeof(buf));
672	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
673}
674
675static void
676atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
677{
678	uint8_t *acmd;
679	uint8_t format;
680	int len;
681
682	acmd = cfis + 0x40;
683
684	len = be16dec(acmd + 7);
685	format = acmd[9] >> 6;
686	switch (format) {
687	case 0:
688	{
689		int msf, size;
690		uint64_t sectors;
691		uint8_t start_track, buf[20], *bp;
692
693		msf = (acmd[1] >> 1) & 1;
694		start_track = acmd[6];
695		if (start_track > 1 && start_track != 0xaa) {
696			uint32_t tfd;
697			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
698			p->asc = 0x24;
699			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
700			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
701			ahci_write_fis_d2h(p, slot, cfis, tfd);
702			return;
703		}
704		bp = buf + 2;
705		*bp++ = 1;
706		*bp++ = 1;
707		if (start_track <= 1) {
708			*bp++ = 0;
709			*bp++ = 0x14;
710			*bp++ = 1;
711			*bp++ = 0;
712			if (msf) {
713				*bp++ = 0;
714				lba_to_msf(bp, 0);
715				bp += 3;
716			} else {
717				*bp++ = 0;
718				*bp++ = 0;
719				*bp++ = 0;
720				*bp++ = 0;
721			}
722		}
723		*bp++ = 0;
724		*bp++ = 0x14;
725		*bp++ = 0xaa;
726		*bp++ = 0;
727		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
728		sectors >>= 2;
729		if (msf) {
730			*bp++ = 0;
731			lba_to_msf(bp, sectors);
732			bp += 3;
733		} else {
734			be32enc(bp, sectors);
735			bp += 4;
736		}
737		size = bp - buf;
738		be16enc(buf, size - 2);
739		if (len > size)
740			len = size;
741		write_prdt(p, slot, cfis, buf, len);
742		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
743		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
744		break;
745	}
746	case 1:
747	{
748		uint8_t buf[12];
749
750		memset(buf, 0, sizeof(buf));
751		buf[1] = 0xa;
752		buf[2] = 0x1;
753		buf[3] = 0x1;
754		if (len > sizeof(buf))
755			len = sizeof(buf);
756		write_prdt(p, slot, cfis, buf, len);
757		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
758		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
759		break;
760	}
761	case 2:
762	{
763		int msf, size;
764		uint64_t sectors;
765		uint8_t start_track, *bp, buf[50];
766
767		msf = (acmd[1] >> 1) & 1;
768		start_track = acmd[6];
769		bp = buf + 2;
770		*bp++ = 1;
771		*bp++ = 1;
772
773		*bp++ = 1;
774		*bp++ = 0x14;
775		*bp++ = 0;
776		*bp++ = 0xa0;
777		*bp++ = 0;
778		*bp++ = 0;
779		*bp++ = 0;
780		*bp++ = 0;
781		*bp++ = 1;
782		*bp++ = 0;
783		*bp++ = 0;
784
785		*bp++ = 1;
786		*bp++ = 0x14;
787		*bp++ = 0;
788		*bp++ = 0xa1;
789		*bp++ = 0;
790		*bp++ = 0;
791		*bp++ = 0;
792		*bp++ = 0;
793		*bp++ = 1;
794		*bp++ = 0;
795		*bp++ = 0;
796
797		*bp++ = 1;
798		*bp++ = 0x14;
799		*bp++ = 0;
800		*bp++ = 0xa2;
801		*bp++ = 0;
802		*bp++ = 0;
803		*bp++ = 0;
804		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
805		sectors >>= 2;
806		if (msf) {
807			*bp++ = 0;
808			lba_to_msf(bp, sectors);
809			bp += 3;
810		} else {
811			be32enc(bp, sectors);
812			bp += 4;
813		}
814
815		*bp++ = 1;
816		*bp++ = 0x14;
817		*bp++ = 0;
818		*bp++ = 1;
819		*bp++ = 0;
820		*bp++ = 0;
821		*bp++ = 0;
822		if (msf) {
823			*bp++ = 0;
824			lba_to_msf(bp, 0);
825			bp += 3;
826		} else {
827			*bp++ = 0;
828			*bp++ = 0;
829			*bp++ = 0;
830			*bp++ = 0;
831		}
832
833		size = bp - buf;
834		be16enc(buf, size - 2);
835		if (len > size)
836			len = size;
837		write_prdt(p, slot, cfis, buf, len);
838		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
839		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
840		break;
841	}
842	default:
843	{
844		uint32_t tfd;
845
846		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
847		p->asc = 0x24;
848		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
849		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
850		ahci_write_fis_d2h(p, slot, cfis, tfd);
851		break;
852	}
853	}
854}
855
856static void
857atapi_read(struct ahci_port *p, int slot, uint8_t *cfis,
858		uint32_t done, int seek)
859{
860	struct ahci_ioreq *aior;
861	struct ahci_cmd_hdr *hdr;
862	struct ahci_prdt_entry *prdt;
863	struct blockif_req *breq;
864	struct pci_ahci_softc *sc;
865	uint8_t *acmd;
866	uint64_t lba;
867	uint32_t len;
868	int i, err, iovcnt;
869
870	sc = p->pr_sc;
871	acmd = cfis + 0x40;
872	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
873	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
874
875	prdt += seek;
876	lba = be32dec(acmd + 2);
877	if (acmd[0] == READ_10)
878		len = be16dec(acmd + 7);
879	else
880		len = be32dec(acmd + 6);
881	if (len == 0) {
882		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
883		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
884	}
885	lba *= 2048;
886	len *= 2048;
887
888	/*
889	 * Pull request off free list
890	 */
891	aior = STAILQ_FIRST(&p->iofhd);
892	assert(aior != NULL);
893	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
894	aior->cfis = cfis;
895	aior->slot = slot;
896	aior->len = len;
897	aior->done = done;
898	breq = &aior->io_req;
899	breq->br_offset = lba + done;
900	iovcnt = hdr->prdtl - seek;
901	if (iovcnt > BLOCKIF_IOV_MAX) {
902		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
903		iovcnt = BLOCKIF_IOV_MAX;
904	} else
905		aior->prdtl = 0;
906	breq->br_iovcnt = iovcnt;
907
908	/*
909	 * Build up the iovec based on the prdt
910	 */
911	for (i = 0; i < hdr->prdtl; i++) {
912		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
913				prdt->dba, prdt->dbc + 1);
914		breq->br_iov[i].iov_len = prdt->dbc + 1;
915		aior->done += (prdt->dbc + 1);
916		prdt++;
917	}
918	err = blockif_read(p->bctx, breq);
919	assert(err == 0);
920}
921
922static void
923atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
924{
925	uint8_t buf[64];
926	uint8_t *acmd;
927	int len;
928
929	acmd = cfis + 0x40;
930	len = acmd[4];
931	if (len > sizeof(buf))
932		len = sizeof(buf);
933	memset(buf, 0, len);
934	buf[0] = 0x70 | (1 << 7);
935	buf[2] = p->sense_key;
936	buf[7] = 10;
937	buf[12] = p->asc;
938	write_prdt(p, slot, cfis, buf, len);
939	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
940	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
941}
942
943static void
944atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
945{
946	uint8_t *acmd = cfis + 0x40;
947	uint32_t tfd;
948
949	switch (acmd[4] & 3) {
950	case 0:
951	case 1:
952	case 3:
953		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
954		tfd = ATA_S_READY | ATA_S_DSC;
955		break;
956	case 2:
957		/* TODO eject media */
958		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
959		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
960		p->asc = 0x53;
961		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
962		break;
963	}
964	ahci_write_fis_d2h(p, slot, cfis, tfd);
965}
966
967static void
968atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
969{
970	uint8_t *acmd;
971	uint32_t tfd;
972	uint8_t pc, code;
973	int len;
974
975	acmd = cfis + 0x40;
976	len = be16dec(acmd + 7);
977	pc = acmd[2] >> 6;
978	code = acmd[2] & 0x3f;
979
980	switch (pc) {
981	case 0:
982		switch (code) {
983		case MODEPAGE_RW_ERROR_RECOVERY:
984		{
985			uint8_t buf[16];
986
987			if (len > sizeof(buf))
988				len = sizeof(buf);
989
990			memset(buf, 0, sizeof(buf));
991			be16enc(buf, 16 - 2);
992			buf[2] = 0x70;
993			buf[8] = 0x01;
994			buf[9] = 16 - 10;
995			buf[11] = 0x05;
996			write_prdt(p, slot, cfis, buf, len);
997			tfd = ATA_S_READY | ATA_S_DSC;
998			break;
999		}
1000		case MODEPAGE_CD_CAPABILITIES:
1001		{
1002			uint8_t buf[30];
1003
1004			if (len > sizeof(buf))
1005				len = sizeof(buf);
1006
1007			memset(buf, 0, sizeof(buf));
1008			be16enc(buf, 30 - 2);
1009			buf[2] = 0x70;
1010			buf[8] = 0x2A;
1011			buf[9] = 30 - 10;
1012			buf[10] = 0x08;
1013			buf[12] = 0x71;
1014			be16enc(&buf[18], 2);
1015			be16enc(&buf[20], 512);
1016			write_prdt(p, slot, cfis, buf, len);
1017			tfd = ATA_S_READY | ATA_S_DSC;
1018			break;
1019		}
1020		default:
1021			goto error;
1022			break;
1023		}
1024		break;
1025	case 3:
1026		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1027		p->asc = 0x39;
1028		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1029		break;
1030error:
1031	case 1:
1032	case 2:
1033		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1034		p->asc = 0x24;
1035		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1036		break;
1037	}
1038	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1039	ahci_write_fis_d2h(p, slot, cfis, tfd);
1040}
1041
1042static void
1043atapi_get_event_status_notification(struct ahci_port *p, int slot,
1044    uint8_t *cfis)
1045{
1046	uint8_t *acmd;
1047	uint32_t tfd;
1048
1049	acmd = cfis + 0x40;
1050
1051	/* we don't support asynchronous operation */
1052	if (!(acmd[1] & 1)) {
1053		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1054		p->asc = 0x24;
1055		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1056	} else {
1057		uint8_t buf[8];
1058		int len;
1059
1060		len = be16dec(acmd + 7);
1061		if (len > sizeof(buf))
1062			len = sizeof(buf);
1063
1064		memset(buf, 0, sizeof(buf));
1065		be16enc(buf, 8 - 2);
1066		buf[2] = 0x04;
1067		buf[3] = 0x10;
1068		buf[5] = 0x02;
1069		write_prdt(p, slot, cfis, buf, len);
1070		tfd = ATA_S_READY | ATA_S_DSC;
1071	}
1072	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1073	ahci_write_fis_d2h(p, slot, cfis, tfd);
1074}
1075
1076static void
1077handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1078{
1079	uint8_t *acmd;
1080
1081	acmd = cfis + 0x40;
1082
1083#ifdef AHCI_DEBUG
1084	{
1085		int i;
1086		DPRINTF("ACMD:");
1087		for (i = 0; i < 16; i++)
1088			DPRINTF("%02x ", acmd[i]);
1089		DPRINTF("\n");
1090	}
1091#endif
1092
1093	switch (acmd[0]) {
1094	case TEST_UNIT_READY:
1095		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1096		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1097		break;
1098	case INQUIRY:
1099		atapi_inquiry(p, slot, cfis);
1100		break;
1101	case READ_CAPACITY:
1102		atapi_read_capacity(p, slot, cfis);
1103		break;
1104	case PREVENT_ALLOW:
1105		/* TODO */
1106		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1107		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1108		break;
1109	case READ_TOC:
1110		atapi_read_toc(p, slot, cfis);
1111		break;
1112	case READ_10:
1113	case READ_12:
1114		atapi_read(p, slot, cfis, 0, 0);
1115		break;
1116	case REQUEST_SENSE:
1117		atapi_request_sense(p, slot, cfis);
1118		break;
1119	case START_STOP_UNIT:
1120		atapi_start_stop_unit(p, slot, cfis);
1121		break;
1122	case MODE_SENSE_10:
1123		atapi_mode_sense(p, slot, cfis);
1124		break;
1125	case GET_EVENT_STATUS_NOTIFICATION:
1126		atapi_get_event_status_notification(p, slot, cfis);
1127		break;
1128	default:
1129		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1130		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1131		p->asc = 0x20;
1132		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1133				ATA_S_READY | ATA_S_ERROR);
1134		break;
1135	}
1136}
1137
1138static void
1139ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1140{
1141
1142	switch (cfis[2]) {
1143	case ATA_ATA_IDENTIFY:
1144		handle_identify(p, slot, cfis);
1145		break;
1146	case ATA_SETFEATURES:
1147	{
1148		switch (cfis[3]) {
1149		case ATA_SF_ENAB_WCACHE:
1150		case ATA_SF_DIS_WCACHE:
1151		case ATA_SF_ENAB_RCACHE:
1152		case ATA_SF_DIS_RCACHE:
1153			p->tfd = ATA_S_DSC | ATA_S_READY;
1154			break;
1155		case ATA_SF_SETXFER:
1156		{
1157			switch (cfis[12] & 0xf8) {
1158			case ATA_PIO:
1159			case ATA_PIO0:
1160				break;
1161			case ATA_WDMA0:
1162			case ATA_UDMA0:
1163				p->xfermode = (cfis[12] & 0x7);
1164				break;
1165			}
1166			p->tfd = ATA_S_DSC | ATA_S_READY;
1167			break;
1168		}
1169		default:
1170			p->tfd = ATA_S_ERROR | ATA_S_READY;
1171			p->tfd |= (ATA_ERROR_ABORT << 8);
1172			break;
1173		}
1174		p->is |= AHCI_P_IX_DP;
1175		p->ci &= ~(1 << slot);
1176		ahci_generate_intr(p->pr_sc);
1177		break;
1178	}
1179	case ATA_SET_MULTI:
1180		if (cfis[12] != 0 &&
1181			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1182			p->tfd = ATA_S_ERROR | ATA_S_READY;
1183			p->tfd |= (ATA_ERROR_ABORT << 8);
1184		} else {
1185			p->mult_sectors = cfis[12];
1186			p->tfd = ATA_S_DSC | ATA_S_READY;
1187		}
1188		p->is |= AHCI_P_IX_DP;
1189		p->ci &= ~(1 << slot);
1190		ahci_generate_intr(p->pr_sc);
1191		break;
1192	case ATA_READ_DMA:
1193	case ATA_WRITE_DMA:
1194	case ATA_READ_DMA48:
1195	case ATA_WRITE_DMA48:
1196	case ATA_READ_FPDMA_QUEUED:
1197	case ATA_WRITE_FPDMA_QUEUED:
1198		ahci_handle_dma(p, slot, cfis, 0, 0);
1199		break;
1200	case ATA_FLUSHCACHE:
1201	case ATA_FLUSHCACHE48:
1202		ahci_handle_flush(p, slot, cfis);
1203		break;
1204	case ATA_STANDBY_CMD:
1205		break;
1206	case ATA_NOP:
1207	case ATA_STANDBY_IMMEDIATE:
1208	case ATA_IDLE_IMMEDIATE:
1209	case ATA_SLEEP:
1210		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1211		break;
1212	case ATA_ATAPI_IDENTIFY:
1213		handle_atapi_identify(p, slot, cfis);
1214		break;
1215	case ATA_PACKET_CMD:
1216		if (!p->atapi) {
1217			p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1218			p->is |= AHCI_P_IX_TFE;
1219			p->ci &= ~(1 << slot);
1220			ahci_generate_intr(p->pr_sc);
1221		} else
1222			handle_packet_cmd(p, slot, cfis);
1223		break;
1224	default:
1225		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1226		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1227		p->is |= AHCI_P_IX_TFE;
1228		p->ci &= ~(1 << slot);
1229		ahci_generate_intr(p->pr_sc);
1230		break;
1231	}
1232}
1233
1234static void
1235ahci_handle_slot(struct ahci_port *p, int slot)
1236{
1237	struct ahci_cmd_hdr *hdr;
1238	struct ahci_prdt_entry *prdt;
1239	struct pci_ahci_softc *sc;
1240	uint8_t *cfis;
1241	int cfl;
1242
1243	sc = p->pr_sc;
1244	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1245	cfl = (hdr->flags & 0x1f) * 4;
1246	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1247			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1248	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1249
1250#ifdef AHCI_DEBUG
1251	DPRINTF("\ncfis:");
1252	for (i = 0; i < cfl; i++) {
1253		if (i % 10 == 0)
1254			DPRINTF("\n");
1255		DPRINTF("%02x ", cfis[i]);
1256	}
1257	DPRINTF("\n");
1258
1259	for (i = 0; i < hdr->prdtl; i++) {
1260		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1261		prdt++;
1262	}
1263#endif
1264
1265	if (cfis[0] != FIS_TYPE_REGH2D) {
1266		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1267		return;
1268	}
1269
1270	if (cfis[1] & 0x80) {
1271		ahci_handle_cmd(p, slot, cfis);
1272	} else {
1273		if (cfis[15] & (1 << 2))
1274			p->reset = 1;
1275		else if (p->reset) {
1276			p->reset = 0;
1277			ahci_port_reset(p);
1278		}
1279		p->ci &= ~(1 << slot);
1280	}
1281}
1282
1283static void
1284ahci_handle_port(struct ahci_port *p)
1285{
1286	int i;
1287
1288	if (!(p->cmd & AHCI_P_CMD_ST))
1289		return;
1290
1291	for (i = 0; (i < 32) && p->ci; i++) {
1292		if (p->ci & (1 << i))
1293			ahci_handle_slot(p, i);
1294	}
1295}
1296
1297/*
1298 * blockif callback routine - this runs in the context of the blockif
1299 * i/o thread, so the mutex needs to be acquired.
1300 */
1301static void
1302ata_ioreq_cb(struct blockif_req *br, int err)
1303{
1304	struct ahci_cmd_hdr *hdr;
1305	struct ahci_ioreq *aior;
1306	struct ahci_port *p;
1307	struct pci_ahci_softc *sc;
1308	uint32_t tfd;
1309	uint8_t *cfis;
1310	int pending, slot, ncq;
1311
1312	DPRINTF("%s %d\n", __func__, err);
1313
1314	ncq = 0;
1315	aior = br->br_param;
1316	p = aior->io_pr;
1317	cfis = aior->cfis;
1318	slot = aior->slot;
1319	pending = aior->prdtl;
1320	sc = p->pr_sc;
1321	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1322
1323	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1324			cfis[2] == ATA_READ_FPDMA_QUEUED)
1325		ncq = 1;
1326
1327	pthread_mutex_lock(&sc->mtx);
1328
1329	/*
1330	 * Move the blockif request back to the free list
1331	 */
1332	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_list);
1333
1334	if (pending && !err) {
1335		ahci_handle_dma(p, slot, cfis, aior->done,
1336		    hdr->prdtl - pending);
1337		goto out;
1338	}
1339
1340	if (!err && aior->done == aior->len) {
1341		tfd = ATA_S_READY | ATA_S_DSC;
1342		if (ncq)
1343			hdr->prdbc = 0;
1344		else
1345			hdr->prdbc = aior->len;
1346	} else {
1347		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1348		hdr->prdbc = 0;
1349		if (ncq)
1350			p->serr |= (1 << slot);
1351	}
1352
1353	if (ncq) {
1354		p->sact &= ~(1 << slot);
1355		ahci_write_fis_sdb(p, slot, tfd);
1356	} else
1357		ahci_write_fis_d2h(p, slot, cfis, tfd);
1358
1359out:
1360	pthread_mutex_unlock(&sc->mtx);
1361	DPRINTF("%s exit\n", __func__);
1362}
1363
1364static void
1365atapi_ioreq_cb(struct blockif_req *br, int err)
1366{
1367	struct ahci_cmd_hdr *hdr;
1368	struct ahci_ioreq *aior;
1369	struct ahci_port *p;
1370	struct pci_ahci_softc *sc;
1371	uint8_t *cfis;
1372	uint32_t tfd;
1373	int pending, slot;
1374
1375	DPRINTF("%s %d\n", __func__, err);
1376
1377	aior = br->br_param;
1378	p = aior->io_pr;
1379	cfis = aior->cfis;
1380	slot = aior->slot;
1381	pending = aior->prdtl;
1382	sc = p->pr_sc;
1383	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1384
1385	pthread_mutex_lock(&sc->mtx);
1386
1387	/*
1388	 * Move the blockif request back to the free list
1389	 */
1390	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_list);
1391
1392	if (pending && !err) {
1393		atapi_read(p, slot, cfis, aior->done, hdr->prdtl - pending);
1394		goto out;
1395	}
1396
1397	if (!err && aior->done == aior->len) {
1398		tfd = ATA_S_READY | ATA_S_DSC;
1399		hdr->prdbc = aior->len;
1400	} else {
1401		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1402		p->asc = 0x21;
1403		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1404		hdr->prdbc = 0;
1405	}
1406
1407	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1408	ahci_write_fis_d2h(p, slot, cfis, tfd);
1409
1410out:
1411	pthread_mutex_unlock(&sc->mtx);
1412	DPRINTF("%s exit\n", __func__);
1413}
1414
1415static void
1416pci_ahci_ioreq_init(struct ahci_port *pr)
1417{
1418	struct ahci_ioreq *vr;
1419	int i;
1420
1421	pr->ioqsz = blockif_queuesz(pr->bctx);
1422	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
1423	STAILQ_INIT(&pr->iofhd);
1424
1425	/*
1426	 * Add all i/o request entries to the free queue
1427	 */
1428	for (i = 0; i < pr->ioqsz; i++) {
1429		vr = &pr->ioreq[i];
1430		vr->io_pr = pr;
1431		if (!pr->atapi)
1432			vr->io_req.br_callback = ata_ioreq_cb;
1433		else
1434			vr->io_req.br_callback = atapi_ioreq_cb;
1435		vr->io_req.br_param = vr;
1436		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_list);
1437	}
1438}
1439
1440static void
1441pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1442{
1443	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1444	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1445	struct ahci_port *p = &sc->port[port];
1446
1447	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1448		port, offset, value);
1449
1450	switch (offset) {
1451	case AHCI_P_CLB:
1452		p->clb = value;
1453		break;
1454	case AHCI_P_CLBU:
1455		p->clbu = value;
1456		break;
1457	case AHCI_P_FB:
1458		p->fb = value;
1459		break;
1460	case AHCI_P_FBU:
1461		p->fbu = value;
1462		break;
1463	case AHCI_P_IS:
1464		p->is &= ~value;
1465		break;
1466	case AHCI_P_IE:
1467		p->ie = value & 0xFDC000FF;
1468		ahci_generate_intr(sc);
1469		break;
1470	case AHCI_P_CMD:
1471	{
1472		p->cmd = value;
1473
1474		if (!(value & AHCI_P_CMD_ST)) {
1475			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
1476			p->ci = 0;
1477			p->sact = 0;
1478		} else {
1479			uint64_t clb;
1480
1481			p->cmd |= AHCI_P_CMD_CR;
1482			clb = (uint64_t)p->clbu << 32 | p->clb;
1483			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
1484					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
1485		}
1486
1487		if (value & AHCI_P_CMD_FRE) {
1488			uint64_t fb;
1489
1490			p->cmd |= AHCI_P_CMD_FR;
1491			fb = (uint64_t)p->fbu << 32 | p->fb;
1492			/* we don't support FBSCP, so rfis size is 256Bytes */
1493			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
1494		} else {
1495			p->cmd &= ~AHCI_P_CMD_FR;
1496		}
1497
1498		if (value & AHCI_P_CMD_CLO) {
1499			p->tfd = 0;
1500			p->cmd &= ~AHCI_P_CMD_CLO;
1501		}
1502
1503		ahci_handle_port(p);
1504		break;
1505	}
1506	case AHCI_P_TFD:
1507	case AHCI_P_SIG:
1508	case AHCI_P_SSTS:
1509		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
1510		break;
1511	case AHCI_P_SCTL:
1512		if (!(p->cmd & AHCI_P_CMD_ST)) {
1513			if (value & ATA_SC_DET_RESET)
1514				ahci_port_reset(p);
1515			p->sctl = value;
1516		}
1517		break;
1518	case AHCI_P_SERR:
1519		p->serr &= ~value;
1520		break;
1521	case AHCI_P_SACT:
1522		p->sact |= value;
1523		break;
1524	case AHCI_P_CI:
1525		p->ci |= value;
1526		ahci_handle_port(p);
1527		break;
1528	case AHCI_P_SNTF:
1529	case AHCI_P_FBS:
1530	default:
1531		break;
1532	}
1533}
1534
1535static void
1536pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1537{
1538	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1539		offset, value);
1540
1541	switch (offset) {
1542	case AHCI_CAP:
1543	case AHCI_PI:
1544	case AHCI_VS:
1545	case AHCI_CAP2:
1546		WPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
1547		break;
1548	case AHCI_GHC:
1549		if (value & AHCI_GHC_HR)
1550			ahci_reset(sc);
1551		else if (value & AHCI_GHC_IE) {
1552			sc->ghc |= AHCI_GHC_IE;
1553			ahci_generate_intr(sc);
1554		}
1555		break;
1556	case AHCI_IS:
1557		sc->is &= ~value;
1558		ahci_generate_intr(sc);
1559		break;
1560	default:
1561		break;
1562	}
1563}
1564
1565static void
1566pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
1567		int baridx, uint64_t offset, int size, uint64_t value)
1568{
1569	struct pci_ahci_softc *sc = pi->pi_arg;
1570
1571	assert(baridx == 5);
1572	assert(size == 4);
1573
1574	pthread_mutex_lock(&sc->mtx);
1575
1576	if (offset < AHCI_OFFSET)
1577		pci_ahci_host_write(sc, offset, value);
1578	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
1579		pci_ahci_port_write(sc, offset, value);
1580	else
1581		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
1582
1583	pthread_mutex_unlock(&sc->mtx);
1584}
1585
1586static uint64_t
1587pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
1588{
1589	uint32_t value;
1590
1591	switch (offset) {
1592	case AHCI_CAP:
1593	case AHCI_GHC:
1594	case AHCI_IS:
1595	case AHCI_PI:
1596	case AHCI_VS:
1597	case AHCI_CCCC:
1598	case AHCI_CCCP:
1599	case AHCI_EM_LOC:
1600	case AHCI_EM_CTL:
1601	case AHCI_CAP2:
1602	{
1603		uint32_t *p = &sc->cap;
1604		p += (offset - AHCI_CAP) / sizeof(uint32_t);
1605		value = *p;
1606		break;
1607	}
1608	default:
1609		value = 0;
1610		break;
1611	}
1612	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
1613		offset, value);
1614
1615	return (value);
1616}
1617
1618static uint64_t
1619pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
1620{
1621	uint32_t value;
1622	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1623	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1624
1625	switch (offset) {
1626	case AHCI_P_CLB:
1627	case AHCI_P_CLBU:
1628	case AHCI_P_FB:
1629	case AHCI_P_FBU:
1630	case AHCI_P_IS:
1631	case AHCI_P_IE:
1632	case AHCI_P_CMD:
1633	case AHCI_P_TFD:
1634	case AHCI_P_SIG:
1635	case AHCI_P_SSTS:
1636	case AHCI_P_SCTL:
1637	case AHCI_P_SERR:
1638	case AHCI_P_SACT:
1639	case AHCI_P_CI:
1640	case AHCI_P_SNTF:
1641	case AHCI_P_FBS:
1642	{
1643		uint32_t *p= &sc->port[port].clb;
1644		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
1645		value = *p;
1646		break;
1647	}
1648	default:
1649		value = 0;
1650		break;
1651	}
1652
1653	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
1654		port, offset, value);
1655
1656	return value;
1657}
1658
1659static uint64_t
1660pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
1661    uint64_t offset, int size)
1662{
1663	struct pci_ahci_softc *sc = pi->pi_arg;
1664	uint32_t value;
1665
1666	assert(baridx == 5);
1667	assert(size == 4);
1668
1669	pthread_mutex_lock(&sc->mtx);
1670
1671	if (offset < AHCI_OFFSET)
1672		value = pci_ahci_host_read(sc, offset);
1673	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
1674		value = pci_ahci_port_read(sc, offset);
1675	else {
1676		value = 0;
1677		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", offset);
1678	}
1679
1680	pthread_mutex_unlock(&sc->mtx);
1681
1682	return (value);
1683}
1684
1685static int
1686pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
1687{
1688	char bident[sizeof("XX:X:X")];
1689	struct blockif_ctxt *bctxt;
1690	struct pci_ahci_softc *sc;
1691	int ret, slots;
1692
1693	ret = 0;
1694
1695	if (opts == NULL) {
1696		fprintf(stderr, "pci_ahci: backing device required\n");
1697		return (1);
1698	}
1699
1700#ifdef AHCI_DEBUG
1701	dbg = fopen("/tmp/log", "w+");
1702#endif
1703
1704       	sc = malloc(sizeof(struct pci_ahci_softc));
1705	memset(sc, 0, sizeof(struct pci_ahci_softc));
1706	pi->pi_arg = sc;
1707	sc->asc_pi = pi;
1708	sc->ports = MAX_PORTS;
1709
1710	/*
1711	 * Only use port 0 for a backing device. All other ports will be
1712	 * marked as unused
1713	 */
1714	sc->port[0].atapi = atapi;
1715
1716	/*
1717	 * Attempt to open the backing image. Use the PCI
1718	 * slot/func/ahci_port for the identifier string
1719	 * since that uniquely identifies a storage device.
1720	 */
1721	snprintf(bident, sizeof(bident), "%d:%d:%d", pi->pi_slot, pi->pi_func,
1722	    0);
1723	bctxt = blockif_open(opts, bident);
1724	if (bctxt == NULL) {
1725		ret = 1;
1726		goto open_fail;
1727	}
1728	sc->port[0].bctx = bctxt;
1729	sc->port[0].pr_sc = sc;
1730
1731	/*
1732	 * Allocate blockif request structures and add them
1733	 * to the free list
1734	 */
1735	pci_ahci_ioreq_init(&sc->port[0]);
1736
1737	pthread_mutex_init(&sc->mtx, NULL);
1738
1739	/* Intel ICH8 AHCI */
1740	slots = sc->port[0].ioqsz;
1741	if (slots > 32)
1742		slots = 32;
1743	--slots;
1744	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
1745	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
1746	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
1747	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
1748	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
1749
1750	/* Only port 0 implemented */
1751	sc->pi = 1;
1752	sc->vs = 0x10300;
1753	sc->cap2 = AHCI_CAP2_APST;
1754	ahci_reset(sc);
1755
1756	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
1757	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
1758	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
1759	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
1760	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
1761	pci_emul_add_msicap(pi, 1);
1762	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
1763	    AHCI_OFFSET + sc->ports * AHCI_STEP);
1764
1765open_fail:
1766	if (ret) {
1767		blockif_close(sc->port[0].bctx);
1768		free(sc);
1769	}
1770
1771	return (ret);
1772}
1773
1774static int
1775pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
1776{
1777
1778	return (pci_ahci_init(ctx, pi, opts, 0));
1779}
1780
1781static int
1782pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
1783{
1784
1785	return (pci_ahci_init(ctx, pi, opts, 1));
1786}
1787
1788/*
1789 * Use separate emulation names to distinguish drive and atapi devices
1790 */
1791struct pci_devemu pci_de_ahci_hd = {
1792	.pe_emu =	"ahci-hd",
1793	.pe_init =	pci_ahci_hd_init,
1794	.pe_barwrite =	pci_ahci_write,
1795	.pe_barread =	pci_ahci_read
1796};
1797PCI_EMUL_SET(pci_de_ahci_hd);
1798
1799struct pci_devemu pci_de_ahci_cd = {
1800	.pe_emu =	"ahci-cd",
1801	.pe_init =	pci_ahci_atapi_init,
1802	.pe_barwrite =	pci_ahci_write,
1803	.pe_barread =	pci_ahci_read
1804};
1805PCI_EMUL_SET(pci_de_ahci_cd);
1806