1/*-
2 * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: releng/11.0/usr.sbin/bhyve/pci_ahci.c 303138 2016-07-21 11:57:41Z mav $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: releng/11.0/usr.sbin/bhyve/pci_ahci.c 303138 2016-07-21 11:57:41Z mav $");
31
32#include <sys/param.h>
33#include <sys/linker_set.h>
34#include <sys/stat.h>
35#include <sys/uio.h>
36#include <sys/ioctl.h>
37#include <sys/disk.h>
38#include <sys/ata.h>
39#include <sys/endian.h>
40
41#include <errno.h>
42#include <fcntl.h>
43#include <stdio.h>
44#include <stdlib.h>
45#include <stdint.h>
46#include <string.h>
47#include <strings.h>
48#include <unistd.h>
49#include <assert.h>
50#include <pthread.h>
51#include <pthread_np.h>
52#include <inttypes.h>
53#include <md5.h>
54
55#include "bhyverun.h"
56#include "pci_emul.h"
57#include "ahci.h"
58#include "block_if.h"
59
60#define	MAX_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
61
62#define	PxSIG_ATA	0x00000101 /* ATA drive */
63#define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
64
65enum sata_fis_type {
66	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
67	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
68	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
69	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
70	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
71	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
72	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
73	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
74};
75
76/*
77 * SCSI opcodes
78 */
79#define	TEST_UNIT_READY		0x00
80#define	REQUEST_SENSE		0x03
81#define	INQUIRY			0x12
82#define	START_STOP_UNIT		0x1B
83#define	PREVENT_ALLOW		0x1E
84#define	READ_CAPACITY		0x25
85#define	READ_10			0x28
86#define	POSITION_TO_ELEMENT	0x2B
87#define	READ_TOC		0x43
88#define	GET_EVENT_STATUS_NOTIFICATION 0x4A
89#define	MODE_SENSE_10		0x5A
90#define	REPORT_LUNS		0xA0
91#define	READ_12			0xA8
92#define	READ_CD			0xBE
93
94/*
95 * SCSI mode page codes
96 */
97#define	MODEPAGE_RW_ERROR_RECOVERY	0x01
98#define	MODEPAGE_CD_CAPABILITIES	0x2A
99
100/*
101 * ATA commands
102 */
103#define	ATA_SF_ENAB_SATA_SF		0x10
104#define		ATA_SATA_SF_AN		0x05
105#define	ATA_SF_DIS_SATA_SF		0x90
106
107/*
108 * Debug printf
109 */
110#ifdef AHCI_DEBUG
111static FILE *dbg;
112#define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
113#else
114#define DPRINTF(format, arg...)
115#endif
116#define WPRINTF(format, arg...) printf(format, ##arg)
117
118struct ahci_ioreq {
119	struct blockif_req io_req;
120	struct ahci_port *io_pr;
121	STAILQ_ENTRY(ahci_ioreq) io_flist;
122	TAILQ_ENTRY(ahci_ioreq) io_blist;
123	uint8_t *cfis;
124	uint32_t len;
125	uint32_t done;
126	int slot;
127	int more;
128};
129
130struct ahci_port {
131	struct blockif_ctxt *bctx;
132	struct pci_ahci_softc *pr_sc;
133	uint8_t *cmd_lst;
134	uint8_t *rfis;
135	char ident[20 + 1];
136	int atapi;
137	int reset;
138	int waitforclear;
139	int mult_sectors;
140	uint8_t xfermode;
141	uint8_t err_cfis[20];
142	uint8_t sense_key;
143	uint8_t asc;
144	u_int ccs;
145	uint32_t pending;
146
147	uint32_t clb;
148	uint32_t clbu;
149	uint32_t fb;
150	uint32_t fbu;
151	uint32_t is;
152	uint32_t ie;
153	uint32_t cmd;
154	uint32_t unused0;
155	uint32_t tfd;
156	uint32_t sig;
157	uint32_t ssts;
158	uint32_t sctl;
159	uint32_t serr;
160	uint32_t sact;
161	uint32_t ci;
162	uint32_t sntf;
163	uint32_t fbs;
164
165	/*
166	 * i/o request info
167	 */
168	struct ahci_ioreq *ioreq;
169	int ioqsz;
170	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
171	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
172};
173
174struct ahci_cmd_hdr {
175	uint16_t flags;
176	uint16_t prdtl;
177	uint32_t prdbc;
178	uint64_t ctba;
179	uint32_t reserved[4];
180};
181
182struct ahci_prdt_entry {
183	uint64_t dba;
184	uint32_t reserved;
185#define	DBCMASK		0x3fffff
186	uint32_t dbc;
187};
188
189struct pci_ahci_softc {
190	struct pci_devinst *asc_pi;
191	pthread_mutex_t	mtx;
192	int ports;
193	uint32_t cap;
194	uint32_t ghc;
195	uint32_t is;
196	uint32_t pi;
197	uint32_t vs;
198	uint32_t ccc_ctl;
199	uint32_t ccc_pts;
200	uint32_t em_loc;
201	uint32_t em_ctl;
202	uint32_t cap2;
203	uint32_t bohc;
204	uint32_t lintr;
205	struct ahci_port port[MAX_PORTS];
206};
207#define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
208
209static void ahci_handle_port(struct ahci_port *p);
210
211static inline void lba_to_msf(uint8_t *buf, int lba)
212{
213	lba += 150;
214	buf[0] = (lba / 75) / 60;
215	buf[1] = (lba / 75) % 60;
216	buf[2] = lba % 75;
217}
218
219/*
220 * generate HBA intr depending on whether or not ports within
221 * the controller have an interrupt pending.
222 */
223static void
224ahci_generate_intr(struct pci_ahci_softc *sc)
225{
226	struct pci_devinst *pi;
227	int i;
228
229	pi = sc->asc_pi;
230
231	for (i = 0; i < sc->ports; i++) {
232		struct ahci_port *pr;
233		pr = &sc->port[i];
234		if (pr->is & pr->ie)
235			sc->is |= (1 << i);
236	}
237
238	DPRINTF("%s %x\n", __func__, sc->is);
239
240	if (sc->is && (sc->ghc & AHCI_GHC_IE)) {
241		if (pci_msi_enabled(pi)) {
242			/*
243			 * Generate an MSI interrupt on every edge
244			 */
245			pci_generate_msi(pi, 0);
246		} else if (!sc->lintr) {
247			/*
248			 * Only generate a pin-based interrupt if one wasn't
249			 * in progress
250			 */
251			sc->lintr = 1;
252			pci_lintr_assert(pi);
253		}
254	} else if (sc->lintr) {
255		/*
256		 * No interrupts: deassert pin-based signal if it had
257		 * been asserted
258		 */
259		pci_lintr_deassert(pi);
260		sc->lintr = 0;
261	}
262}
263
264static void
265ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
266{
267	int offset, len, irq;
268
269	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
270		return;
271
272	switch (ft) {
273	case FIS_TYPE_REGD2H:
274		offset = 0x40;
275		len = 20;
276		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_DHR : 0;
277		break;
278	case FIS_TYPE_SETDEVBITS:
279		offset = 0x58;
280		len = 8;
281		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_SDB : 0;
282		break;
283	case FIS_TYPE_PIOSETUP:
284		offset = 0x20;
285		len = 20;
286		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_PS : 0;
287		break;
288	default:
289		WPRINTF("unsupported fis type %d\n", ft);
290		return;
291	}
292	if (fis[2] & ATA_S_ERROR) {
293		p->waitforclear = 1;
294		irq |= AHCI_P_IX_TFE;
295	}
296	memcpy(p->rfis + offset, fis, len);
297	if (irq) {
298		p->is |= irq;
299		ahci_generate_intr(p->pr_sc);
300	}
301}
302
303static void
304ahci_write_fis_piosetup(struct ahci_port *p)
305{
306	uint8_t fis[20];
307
308	memset(fis, 0, sizeof(fis));
309	fis[0] = FIS_TYPE_PIOSETUP;
310	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
311}
312
313static void
314ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
315{
316	uint8_t fis[8];
317	uint8_t error;
318
319	error = (tfd >> 8) & 0xff;
320	tfd &= 0x77;
321	memset(fis, 0, sizeof(fis));
322	fis[0] = FIS_TYPE_SETDEVBITS;
323	fis[1] = (1 << 6);
324	fis[2] = tfd;
325	fis[3] = error;
326	if (fis[2] & ATA_S_ERROR) {
327		p->err_cfis[0] = slot;
328		p->err_cfis[2] = tfd;
329		p->err_cfis[3] = error;
330		memcpy(&p->err_cfis[4], cfis + 4, 16);
331	} else {
332		*(uint32_t *)(fis + 4) = (1 << slot);
333		p->sact &= ~(1 << slot);
334	}
335	p->tfd &= ~0x77;
336	p->tfd |= tfd;
337	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
338}
339
340static void
341ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
342{
343	uint8_t fis[20];
344	uint8_t error;
345
346	error = (tfd >> 8) & 0xff;
347	memset(fis, 0, sizeof(fis));
348	fis[0] = FIS_TYPE_REGD2H;
349	fis[1] = (1 << 6);
350	fis[2] = tfd & 0xff;
351	fis[3] = error;
352	fis[4] = cfis[4];
353	fis[5] = cfis[5];
354	fis[6] = cfis[6];
355	fis[7] = cfis[7];
356	fis[8] = cfis[8];
357	fis[9] = cfis[9];
358	fis[10] = cfis[10];
359	fis[11] = cfis[11];
360	fis[12] = cfis[12];
361	fis[13] = cfis[13];
362	if (fis[2] & ATA_S_ERROR) {
363		p->err_cfis[0] = 0x80;
364		p->err_cfis[2] = tfd & 0xff;
365		p->err_cfis[3] = error;
366		memcpy(&p->err_cfis[4], cfis + 4, 16);
367	} else
368		p->ci &= ~(1 << slot);
369	p->tfd = tfd;
370	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
371}
372
373static void
374ahci_write_fis_d2h_ncq(struct ahci_port *p, int slot)
375{
376	uint8_t fis[20];
377
378	p->tfd = ATA_S_READY | ATA_S_DSC;
379	memset(fis, 0, sizeof(fis));
380	fis[0] = FIS_TYPE_REGD2H;
381	fis[1] = 0;			/* No interrupt */
382	fis[2] = p->tfd;		/* Status */
383	fis[3] = 0;			/* No error */
384	p->ci &= ~(1 << slot);
385	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
386}
387
388static void
389ahci_write_reset_fis_d2h(struct ahci_port *p)
390{
391	uint8_t fis[20];
392
393	memset(fis, 0, sizeof(fis));
394	fis[0] = FIS_TYPE_REGD2H;
395	fis[3] = 1;
396	fis[4] = 1;
397	if (p->atapi) {
398		fis[5] = 0x14;
399		fis[6] = 0xeb;
400	}
401	fis[12] = 1;
402	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
403}
404
405static void
406ahci_check_stopped(struct ahci_port *p)
407{
408	/*
409	 * If we are no longer processing the command list and nothing
410	 * is in-flight, clear the running bit, the current command
411	 * slot, the command issue and active bits.
412	 */
413	if (!(p->cmd & AHCI_P_CMD_ST)) {
414		if (p->pending == 0) {
415			p->ccs = 0;
416			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
417			p->ci = 0;
418			p->sact = 0;
419			p->waitforclear = 0;
420		}
421	}
422}
423
424static void
425ahci_port_stop(struct ahci_port *p)
426{
427	struct ahci_ioreq *aior;
428	uint8_t *cfis;
429	int slot;
430	int error;
431
432	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
433
434	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
435		/*
436		 * Try to cancel the outstanding blockif request.
437		 */
438		error = blockif_cancel(p->bctx, &aior->io_req);
439		if (error != 0)
440			continue;
441
442		slot = aior->slot;
443		cfis = aior->cfis;
444		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
445		    cfis[2] == ATA_READ_FPDMA_QUEUED ||
446		    cfis[2] == ATA_SEND_FPDMA_QUEUED)
447			p->sact &= ~(1 << slot);	/* NCQ */
448		else
449			p->ci &= ~(1 << slot);
450
451		/*
452		 * This command is now done.
453		 */
454		p->pending &= ~(1 << slot);
455
456		/*
457		 * Delete the blockif request from the busy list
458		 */
459		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
460
461		/*
462		 * Move the blockif request back to the free list
463		 */
464		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
465	}
466
467	ahci_check_stopped(p);
468}
469
470static void
471ahci_port_reset(struct ahci_port *pr)
472{
473	pr->serr = 0;
474	pr->sact = 0;
475	pr->xfermode = ATA_UDMA6;
476	pr->mult_sectors = 128;
477
478	if (!pr->bctx) {
479		pr->ssts = ATA_SS_DET_NO_DEVICE;
480		pr->sig = 0xFFFFFFFF;
481		pr->tfd = 0x7F;
482		return;
483	}
484	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
485	if (pr->sctl & ATA_SC_SPD_MASK)
486		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
487	else
488		pr->ssts |= ATA_SS_SPD_GEN3;
489	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
490	if (!pr->atapi) {
491		pr->sig = PxSIG_ATA;
492		pr->tfd |= ATA_S_READY;
493	} else
494		pr->sig = PxSIG_ATAPI;
495	ahci_write_reset_fis_d2h(pr);
496}
497
498static void
499ahci_reset(struct pci_ahci_softc *sc)
500{
501	int i;
502
503	sc->ghc = AHCI_GHC_AE;
504	sc->is = 0;
505
506	if (sc->lintr) {
507		pci_lintr_deassert(sc->asc_pi);
508		sc->lintr = 0;
509	}
510
511	for (i = 0; i < sc->ports; i++) {
512		sc->port[i].ie = 0;
513		sc->port[i].is = 0;
514		sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD);
515		if (sc->port[i].bctx)
516			sc->port[i].cmd |= AHCI_P_CMD_CPS;
517		sc->port[i].sctl = 0;
518		ahci_port_reset(&sc->port[i]);
519	}
520}
521
522static void
523ata_string(uint8_t *dest, const char *src, int len)
524{
525	int i;
526
527	for (i = 0; i < len; i++) {
528		if (*src)
529			dest[i ^ 1] = *src++;
530		else
531			dest[i ^ 1] = ' ';
532	}
533}
534
535static void
536atapi_string(uint8_t *dest, const char *src, int len)
537{
538	int i;
539
540	for (i = 0; i < len; i++) {
541		if (*src)
542			dest[i] = *src++;
543		else
544			dest[i] = ' ';
545	}
546}
547
548/*
549 * Build up the iovec based on the PRDT, 'done' and 'len'.
550 */
551static void
552ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
553    struct ahci_prdt_entry *prdt, uint16_t prdtl)
554{
555	struct blockif_req *breq = &aior->io_req;
556	int i, j, skip, todo, left, extra;
557	uint32_t dbcsz;
558
559	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
560	skip = aior->done;
561	left = aior->len - aior->done;
562	todo = 0;
563	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
564	    i++, prdt++) {
565		dbcsz = (prdt->dbc & DBCMASK) + 1;
566		/* Skip already done part of the PRDT */
567		if (dbcsz <= skip) {
568			skip -= dbcsz;
569			continue;
570		}
571		dbcsz -= skip;
572		if (dbcsz > left)
573			dbcsz = left;
574		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
575		    prdt->dba + skip, dbcsz);
576		breq->br_iov[j].iov_len = dbcsz;
577		todo += dbcsz;
578		left -= dbcsz;
579		skip = 0;
580		j++;
581	}
582
583	/* If we got limited by IOV length, round I/O down to sector size. */
584	if (j == BLOCKIF_IOV_MAX) {
585		extra = todo % blockif_sectsz(p->bctx);
586		todo -= extra;
587		assert(todo > 0);
588		while (extra > 0) {
589			if (breq->br_iov[j - 1].iov_len > extra) {
590				breq->br_iov[j - 1].iov_len -= extra;
591				break;
592			}
593			extra -= breq->br_iov[j - 1].iov_len;
594			j--;
595		}
596	}
597
598	breq->br_iovcnt = j;
599	breq->br_resid = todo;
600	aior->done += todo;
601	aior->more = (aior->done < aior->len && i < prdtl);
602}
603
604static void
605ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
606{
607	struct ahci_ioreq *aior;
608	struct blockif_req *breq;
609	struct ahci_prdt_entry *prdt;
610	struct ahci_cmd_hdr *hdr;
611	uint64_t lba;
612	uint32_t len;
613	int err, first, ncq, readop;
614
615	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
616	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
617	ncq = 0;
618	readop = 1;
619	first = (done == 0);
620
621	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
622	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
623	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
624	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
625		readop = 0;
626
627	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
628	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
629		lba = ((uint64_t)cfis[10] << 40) |
630			((uint64_t)cfis[9] << 32) |
631			((uint64_t)cfis[8] << 24) |
632			((uint64_t)cfis[6] << 16) |
633			((uint64_t)cfis[5] << 8) |
634			cfis[4];
635		len = cfis[11] << 8 | cfis[3];
636		if (!len)
637			len = 65536;
638		ncq = 1;
639	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
640	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
641	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
642		lba = ((uint64_t)cfis[10] << 40) |
643			((uint64_t)cfis[9] << 32) |
644			((uint64_t)cfis[8] << 24) |
645			((uint64_t)cfis[6] << 16) |
646			((uint64_t)cfis[5] << 8) |
647			cfis[4];
648		len = cfis[13] << 8 | cfis[12];
649		if (!len)
650			len = 65536;
651	} else {
652		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
653			(cfis[5] << 8) | cfis[4];
654		len = cfis[12];
655		if (!len)
656			len = 256;
657	}
658	lba *= blockif_sectsz(p->bctx);
659	len *= blockif_sectsz(p->bctx);
660
661	/* Pull request off free list */
662	aior = STAILQ_FIRST(&p->iofhd);
663	assert(aior != NULL);
664	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
665
666	aior->cfis = cfis;
667	aior->slot = slot;
668	aior->len = len;
669	aior->done = done;
670	breq = &aior->io_req;
671	breq->br_offset = lba + done;
672	ahci_build_iov(p, aior, prdt, hdr->prdtl);
673
674	/* Mark this command in-flight. */
675	p->pending |= 1 << slot;
676
677	/* Stuff request onto busy list. */
678	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
679
680	if (ncq && first)
681		ahci_write_fis_d2h_ncq(p, slot);
682
683	if (readop)
684		err = blockif_read(p->bctx, breq);
685	else
686		err = blockif_write(p->bctx, breq);
687	assert(err == 0);
688}
689
690static void
691ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
692{
693	struct ahci_ioreq *aior;
694	struct blockif_req *breq;
695	int err;
696
697	/*
698	 * Pull request off free list
699	 */
700	aior = STAILQ_FIRST(&p->iofhd);
701	assert(aior != NULL);
702	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
703	aior->cfis = cfis;
704	aior->slot = slot;
705	aior->len = 0;
706	aior->done = 0;
707	aior->more = 0;
708	breq = &aior->io_req;
709
710	/*
711	 * Mark this command in-flight.
712	 */
713	p->pending |= 1 << slot;
714
715	/*
716	 * Stuff request onto busy list
717	 */
718	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
719
720	err = blockif_flush(p->bctx, breq);
721	assert(err == 0);
722}
723
724static inline void
725read_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
726		void *buf, int size)
727{
728	struct ahci_cmd_hdr *hdr;
729	struct ahci_prdt_entry *prdt;
730	void *to;
731	int i, len;
732
733	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
734	len = size;
735	to = buf;
736	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
737	for (i = 0; i < hdr->prdtl && len; i++) {
738		uint8_t *ptr;
739		uint32_t dbcsz;
740		int sublen;
741
742		dbcsz = (prdt->dbc & DBCMASK) + 1;
743		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
744		sublen = MIN(len, dbcsz);
745		memcpy(to, ptr, sublen);
746		len -= sublen;
747		to += sublen;
748		prdt++;
749	}
750}
751
752static void
753ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
754{
755	struct ahci_ioreq *aior;
756	struct blockif_req *breq;
757	uint8_t *entry;
758	uint64_t elba;
759	uint32_t len, elen;
760	int err, first, ncq;
761	uint8_t buf[512];
762
763	first = (done == 0);
764	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
765		len = (uint16_t)cfis[13] << 8 | cfis[12];
766		len *= 512;
767		ncq = 0;
768	} else { /* ATA_SEND_FPDMA_QUEUED */
769		len = (uint16_t)cfis[11] << 8 | cfis[3];
770		len *= 512;
771		ncq = 1;
772	}
773	read_prdt(p, slot, cfis, buf, sizeof(buf));
774
775next:
776	entry = &buf[done];
777	elba = ((uint64_t)entry[5] << 40) |
778		((uint64_t)entry[4] << 32) |
779		((uint64_t)entry[3] << 24) |
780		((uint64_t)entry[2] << 16) |
781		((uint64_t)entry[1] << 8) |
782		entry[0];
783	elen = (uint16_t)entry[7] << 8 | entry[6];
784	done += 8;
785	if (elen == 0) {
786		if (done >= len) {
787			if (ncq) {
788				if (first)
789					ahci_write_fis_d2h_ncq(p, slot);
790				ahci_write_fis_sdb(p, slot, cfis,
791				    ATA_S_READY | ATA_S_DSC);
792			} else {
793				ahci_write_fis_d2h(p, slot, cfis,
794				    ATA_S_READY | ATA_S_DSC);
795			}
796			p->pending &= ~(1 << slot);
797			ahci_check_stopped(p);
798			if (!first)
799				ahci_handle_port(p);
800			return;
801		}
802		goto next;
803	}
804
805	/*
806	 * Pull request off free list
807	 */
808	aior = STAILQ_FIRST(&p->iofhd);
809	assert(aior != NULL);
810	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
811	aior->cfis = cfis;
812	aior->slot = slot;
813	aior->len = len;
814	aior->done = done;
815	aior->more = (len != done);
816
817	breq = &aior->io_req;
818	breq->br_offset = elba * blockif_sectsz(p->bctx);
819	breq->br_resid = elen * blockif_sectsz(p->bctx);
820
821	/*
822	 * Mark this command in-flight.
823	 */
824	p->pending |= 1 << slot;
825
826	/*
827	 * Stuff request onto busy list
828	 */
829	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
830
831	if (ncq && first)
832		ahci_write_fis_d2h_ncq(p, slot);
833
834	err = blockif_delete(p->bctx, breq);
835	assert(err == 0);
836}
837
838static inline void
839write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
840		void *buf, int size)
841{
842	struct ahci_cmd_hdr *hdr;
843	struct ahci_prdt_entry *prdt;
844	void *from;
845	int i, len;
846
847	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
848	len = size;
849	from = buf;
850	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
851	for (i = 0; i < hdr->prdtl && len; i++) {
852		uint8_t *ptr;
853		uint32_t dbcsz;
854		int sublen;
855
856		dbcsz = (prdt->dbc & DBCMASK) + 1;
857		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
858		sublen = MIN(len, dbcsz);
859		memcpy(ptr, from, sublen);
860		len -= sublen;
861		from += sublen;
862		prdt++;
863	}
864	hdr->prdbc = size - len;
865}
866
867static void
868ahci_checksum(uint8_t *buf, int size)
869{
870	int i;
871	uint8_t sum = 0;
872
873	for (i = 0; i < size - 1; i++)
874		sum += buf[i];
875	buf[size - 1] = 0x100 - sum;
876}
877
878static void
879ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
880{
881	struct ahci_cmd_hdr *hdr;
882	uint8_t buf[512];
883
884	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
885	if (p->atapi || hdr->prdtl == 0 || cfis[4] != 0x10 ||
886	    cfis[5] != 0 || cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
887		ahci_write_fis_d2h(p, slot, cfis,
888		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
889		return;
890	}
891
892	memset(buf, 0, sizeof(buf));
893	memcpy(buf, p->err_cfis, sizeof(p->err_cfis));
894	ahci_checksum(buf, sizeof(buf));
895
896	if (cfis[2] == ATA_READ_LOG_EXT)
897		ahci_write_fis_piosetup(p);
898	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
899	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
900}
901
902static void
903handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
904{
905	struct ahci_cmd_hdr *hdr;
906
907	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
908	if (p->atapi || hdr->prdtl == 0) {
909		ahci_write_fis_d2h(p, slot, cfis,
910		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
911	} else {
912		uint16_t buf[256];
913		uint64_t sectors;
914		int sectsz, psectsz, psectoff, candelete, ro;
915		uint16_t cyl;
916		uint8_t sech, heads;
917
918		ro = blockif_is_ro(p->bctx);
919		candelete = blockif_candelete(p->bctx);
920		sectsz = blockif_sectsz(p->bctx);
921		sectors = blockif_size(p->bctx) / sectsz;
922		blockif_chs(p->bctx, &cyl, &heads, &sech);
923		blockif_psectsz(p->bctx, &psectsz, &psectoff);
924		memset(buf, 0, sizeof(buf));
925		buf[0] = 0x0040;
926		buf[1] = cyl;
927		buf[3] = heads;
928		buf[6] = sech;
929		ata_string((uint8_t *)(buf+10), p->ident, 20);
930		ata_string((uint8_t *)(buf+23), "001", 8);
931		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
932		buf[47] = (0x8000 | 128);
933		buf[48] = 0;
934		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
935		buf[50] = (1 << 14);
936		buf[53] = (1 << 1 | 1 << 2);
937		if (p->mult_sectors)
938			buf[59] = (0x100 | p->mult_sectors);
939		if (sectors <= 0x0fffffff) {
940			buf[60] = sectors;
941			buf[61] = (sectors >> 16);
942		} else {
943			buf[60] = 0xffff;
944			buf[61] = 0x0fff;
945		}
946		buf[63] = 0x7;
947		if (p->xfermode & ATA_WDMA0)
948			buf[63] |= (1 << ((p->xfermode & 7) + 8));
949		buf[64] = 0x3;
950		buf[65] = 120;
951		buf[66] = 120;
952		buf[67] = 120;
953		buf[68] = 120;
954		buf[69] = 0;
955		buf[75] = 31;
956		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
957			   ATA_SUPPORT_NCQ);
958		buf[77] = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
959			   (p->ssts & ATA_SS_SPD_MASK) >> 3);
960		buf[80] = 0x3f0;
961		buf[81] = 0x28;
962		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
963			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
964		buf[83] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
965			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
966		buf[84] = (1 << 14);
967		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
968			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
969		buf[86] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
970			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
971		buf[87] = (1 << 14);
972		buf[88] = 0x7f;
973		if (p->xfermode & ATA_UDMA0)
974			buf[88] |= (1 << ((p->xfermode & 7) + 8));
975		buf[100] = sectors;
976		buf[101] = (sectors >> 16);
977		buf[102] = (sectors >> 32);
978		buf[103] = (sectors >> 48);
979		if (candelete && !ro) {
980			buf[69] |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
981			buf[105] = 1;
982			buf[169] = ATA_SUPPORT_DSM_TRIM;
983		}
984		buf[106] = 0x4000;
985		buf[209] = 0x4000;
986		if (psectsz > sectsz) {
987			buf[106] |= 0x2000;
988			buf[106] |= ffsl(psectsz / sectsz) - 1;
989			buf[209] |= (psectoff / sectsz);
990		}
991		if (sectsz > 512) {
992			buf[106] |= 0x1000;
993			buf[117] = sectsz / 2;
994			buf[118] = ((sectsz / 2) >> 16);
995		}
996		buf[119] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
997		buf[120] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
998		buf[222] = 0x1020;
999		buf[255] = 0x00a5;
1000		ahci_checksum((uint8_t *)buf, sizeof(buf));
1001		ahci_write_fis_piosetup(p);
1002		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
1003		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1004	}
1005}
1006
1007static void
1008handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
1009{
1010	if (!p->atapi) {
1011		ahci_write_fis_d2h(p, slot, cfis,
1012		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1013	} else {
1014		uint16_t buf[256];
1015
1016		memset(buf, 0, sizeof(buf));
1017		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
1018		ata_string((uint8_t *)(buf+10), p->ident, 20);
1019		ata_string((uint8_t *)(buf+23), "001", 8);
1020		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
1021		buf[49] = (1 << 9 | 1 << 8);
1022		buf[50] = (1 << 14 | 1);
1023		buf[53] = (1 << 2 | 1 << 1);
1024		buf[62] = 0x3f;
1025		buf[63] = 7;
1026		if (p->xfermode & ATA_WDMA0)
1027			buf[63] |= (1 << ((p->xfermode & 7) + 8));
1028		buf[64] = 3;
1029		buf[65] = 120;
1030		buf[66] = 120;
1031		buf[67] = 120;
1032		buf[68] = 120;
1033		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
1034		buf[77] = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
1035		buf[78] = (1 << 5);
1036		buf[80] = 0x3f0;
1037		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1038			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1039		buf[83] = (1 << 14);
1040		buf[84] = (1 << 14);
1041		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1042			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1043		buf[87] = (1 << 14);
1044		buf[88] = 0x7f;
1045		if (p->xfermode & ATA_UDMA0)
1046			buf[88] |= (1 << ((p->xfermode & 7) + 8));
1047		buf[222] = 0x1020;
1048		buf[255] = 0x00a5;
1049		ahci_checksum((uint8_t *)buf, sizeof(buf));
1050		ahci_write_fis_piosetup(p);
1051		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
1052		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1053	}
1054}
1055
1056static void
1057atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
1058{
1059	uint8_t buf[36];
1060	uint8_t *acmd;
1061	int len;
1062	uint32_t tfd;
1063
1064	acmd = cfis + 0x40;
1065
1066	if (acmd[1] & 1) {		/* VPD */
1067		if (acmd[2] == 0) {	/* Supported VPD pages */
1068			buf[0] = 0x05;
1069			buf[1] = 0;
1070			buf[2] = 0;
1071			buf[3] = 1;
1072			buf[4] = 0;
1073			len = 4 + buf[3];
1074		} else {
1075			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1076			p->asc = 0x24;
1077			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1078			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1079			ahci_write_fis_d2h(p, slot, cfis, tfd);
1080			return;
1081		}
1082	} else {
1083		buf[0] = 0x05;
1084		buf[1] = 0x80;
1085		buf[2] = 0x00;
1086		buf[3] = 0x21;
1087		buf[4] = 31;
1088		buf[5] = 0;
1089		buf[6] = 0;
1090		buf[7] = 0;
1091		atapi_string(buf + 8, "BHYVE", 8);
1092		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
1093		atapi_string(buf + 32, "001", 4);
1094		len = sizeof(buf);
1095	}
1096
1097	if (len > acmd[4])
1098		len = acmd[4];
1099	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1100	write_prdt(p, slot, cfis, buf, len);
1101	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1102}
1103
1104static void
1105atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
1106{
1107	uint8_t buf[8];
1108	uint64_t sectors;
1109
1110	sectors = blockif_size(p->bctx) / 2048;
1111	be32enc(buf, sectors - 1);
1112	be32enc(buf + 4, 2048);
1113	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1114	write_prdt(p, slot, cfis, buf, sizeof(buf));
1115	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1116}
1117
1118static void
1119atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1120{
1121	uint8_t *acmd;
1122	uint8_t format;
1123	int len;
1124
1125	acmd = cfis + 0x40;
1126
1127	len = be16dec(acmd + 7);
1128	format = acmd[9] >> 6;
1129	switch (format) {
1130	case 0:
1131	{
1132		int msf, size;
1133		uint64_t sectors;
1134		uint8_t start_track, buf[20], *bp;
1135
1136		msf = (acmd[1] >> 1) & 1;
1137		start_track = acmd[6];
1138		if (start_track > 1 && start_track != 0xaa) {
1139			uint32_t tfd;
1140			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1141			p->asc = 0x24;
1142			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1143			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1144			ahci_write_fis_d2h(p, slot, cfis, tfd);
1145			return;
1146		}
1147		bp = buf + 2;
1148		*bp++ = 1;
1149		*bp++ = 1;
1150		if (start_track <= 1) {
1151			*bp++ = 0;
1152			*bp++ = 0x14;
1153			*bp++ = 1;
1154			*bp++ = 0;
1155			if (msf) {
1156				*bp++ = 0;
1157				lba_to_msf(bp, 0);
1158				bp += 3;
1159			} else {
1160				*bp++ = 0;
1161				*bp++ = 0;
1162				*bp++ = 0;
1163				*bp++ = 0;
1164			}
1165		}
1166		*bp++ = 0;
1167		*bp++ = 0x14;
1168		*bp++ = 0xaa;
1169		*bp++ = 0;
1170		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1171		sectors >>= 2;
1172		if (msf) {
1173			*bp++ = 0;
1174			lba_to_msf(bp, sectors);
1175			bp += 3;
1176		} else {
1177			be32enc(bp, sectors);
1178			bp += 4;
1179		}
1180		size = bp - buf;
1181		be16enc(buf, size - 2);
1182		if (len > size)
1183			len = size;
1184		write_prdt(p, slot, cfis, buf, len);
1185		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1186		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1187		break;
1188	}
1189	case 1:
1190	{
1191		uint8_t buf[12];
1192
1193		memset(buf, 0, sizeof(buf));
1194		buf[1] = 0xa;
1195		buf[2] = 0x1;
1196		buf[3] = 0x1;
1197		if (len > sizeof(buf))
1198			len = sizeof(buf);
1199		write_prdt(p, slot, cfis, buf, len);
1200		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1201		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1202		break;
1203	}
1204	case 2:
1205	{
1206		int msf, size;
1207		uint64_t sectors;
1208		uint8_t *bp, buf[50];
1209
1210		msf = (acmd[1] >> 1) & 1;
1211		bp = buf + 2;
1212		*bp++ = 1;
1213		*bp++ = 1;
1214
1215		*bp++ = 1;
1216		*bp++ = 0x14;
1217		*bp++ = 0;
1218		*bp++ = 0xa0;
1219		*bp++ = 0;
1220		*bp++ = 0;
1221		*bp++ = 0;
1222		*bp++ = 0;
1223		*bp++ = 1;
1224		*bp++ = 0;
1225		*bp++ = 0;
1226
1227		*bp++ = 1;
1228		*bp++ = 0x14;
1229		*bp++ = 0;
1230		*bp++ = 0xa1;
1231		*bp++ = 0;
1232		*bp++ = 0;
1233		*bp++ = 0;
1234		*bp++ = 0;
1235		*bp++ = 1;
1236		*bp++ = 0;
1237		*bp++ = 0;
1238
1239		*bp++ = 1;
1240		*bp++ = 0x14;
1241		*bp++ = 0;
1242		*bp++ = 0xa2;
1243		*bp++ = 0;
1244		*bp++ = 0;
1245		*bp++ = 0;
1246		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1247		sectors >>= 2;
1248		if (msf) {
1249			*bp++ = 0;
1250			lba_to_msf(bp, sectors);
1251			bp += 3;
1252		} else {
1253			be32enc(bp, sectors);
1254			bp += 4;
1255		}
1256
1257		*bp++ = 1;
1258		*bp++ = 0x14;
1259		*bp++ = 0;
1260		*bp++ = 1;
1261		*bp++ = 0;
1262		*bp++ = 0;
1263		*bp++ = 0;
1264		if (msf) {
1265			*bp++ = 0;
1266			lba_to_msf(bp, 0);
1267			bp += 3;
1268		} else {
1269			*bp++ = 0;
1270			*bp++ = 0;
1271			*bp++ = 0;
1272			*bp++ = 0;
1273		}
1274
1275		size = bp - buf;
1276		be16enc(buf, size - 2);
1277		if (len > size)
1278			len = size;
1279		write_prdt(p, slot, cfis, buf, len);
1280		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1281		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1282		break;
1283	}
1284	default:
1285	{
1286		uint32_t tfd;
1287
1288		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1289		p->asc = 0x24;
1290		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1291		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1292		ahci_write_fis_d2h(p, slot, cfis, tfd);
1293		break;
1294	}
1295	}
1296}
1297
1298static void
1299atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
1300{
1301	uint8_t buf[16];
1302
1303	memset(buf, 0, sizeof(buf));
1304	buf[3] = 8;
1305
1306	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1307	write_prdt(p, slot, cfis, buf, sizeof(buf));
1308	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1309}
1310
1311static void
1312atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
1313{
1314	struct ahci_ioreq *aior;
1315	struct ahci_cmd_hdr *hdr;
1316	struct ahci_prdt_entry *prdt;
1317	struct blockif_req *breq;
1318	uint8_t *acmd;
1319	uint64_t lba;
1320	uint32_t len;
1321	int err;
1322
1323	acmd = cfis + 0x40;
1324	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1325	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1326
1327	lba = be32dec(acmd + 2);
1328	if (acmd[0] == READ_10)
1329		len = be16dec(acmd + 7);
1330	else
1331		len = be32dec(acmd + 6);
1332	if (len == 0) {
1333		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1334		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1335	}
1336	lba *= 2048;
1337	len *= 2048;
1338
1339	/*
1340	 * Pull request off free list
1341	 */
1342	aior = STAILQ_FIRST(&p->iofhd);
1343	assert(aior != NULL);
1344	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1345	aior->cfis = cfis;
1346	aior->slot = slot;
1347	aior->len = len;
1348	aior->done = done;
1349	breq = &aior->io_req;
1350	breq->br_offset = lba + done;
1351	ahci_build_iov(p, aior, prdt, hdr->prdtl);
1352
1353	/* Mark this command in-flight. */
1354	p->pending |= 1 << slot;
1355
1356	/* Stuff request onto busy list. */
1357	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1358
1359	err = blockif_read(p->bctx, breq);
1360	assert(err == 0);
1361}
1362
1363static void
1364atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1365{
1366	uint8_t buf[64];
1367	uint8_t *acmd;
1368	int len;
1369
1370	acmd = cfis + 0x40;
1371	len = acmd[4];
1372	if (len > sizeof(buf))
1373		len = sizeof(buf);
1374	memset(buf, 0, len);
1375	buf[0] = 0x70 | (1 << 7);
1376	buf[2] = p->sense_key;
1377	buf[7] = 10;
1378	buf[12] = p->asc;
1379	write_prdt(p, slot, cfis, buf, len);
1380	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1381	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1382}
1383
1384static void
1385atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1386{
1387	uint8_t *acmd = cfis + 0x40;
1388	uint32_t tfd;
1389
1390	switch (acmd[4] & 3) {
1391	case 0:
1392	case 1:
1393	case 3:
1394		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1395		tfd = ATA_S_READY | ATA_S_DSC;
1396		break;
1397	case 2:
1398		/* TODO eject media */
1399		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1400		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1401		p->asc = 0x53;
1402		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1403		break;
1404	}
1405	ahci_write_fis_d2h(p, slot, cfis, tfd);
1406}
1407
1408static void
1409atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1410{
1411	uint8_t *acmd;
1412	uint32_t tfd;
1413	uint8_t pc, code;
1414	int len;
1415
1416	acmd = cfis + 0x40;
1417	len = be16dec(acmd + 7);
1418	pc = acmd[2] >> 6;
1419	code = acmd[2] & 0x3f;
1420
1421	switch (pc) {
1422	case 0:
1423		switch (code) {
1424		case MODEPAGE_RW_ERROR_RECOVERY:
1425		{
1426			uint8_t buf[16];
1427
1428			if (len > sizeof(buf))
1429				len = sizeof(buf);
1430
1431			memset(buf, 0, sizeof(buf));
1432			be16enc(buf, 16 - 2);
1433			buf[2] = 0x70;
1434			buf[8] = 0x01;
1435			buf[9] = 16 - 10;
1436			buf[11] = 0x05;
1437			write_prdt(p, slot, cfis, buf, len);
1438			tfd = ATA_S_READY | ATA_S_DSC;
1439			break;
1440		}
1441		case MODEPAGE_CD_CAPABILITIES:
1442		{
1443			uint8_t buf[30];
1444
1445			if (len > sizeof(buf))
1446				len = sizeof(buf);
1447
1448			memset(buf, 0, sizeof(buf));
1449			be16enc(buf, 30 - 2);
1450			buf[2] = 0x70;
1451			buf[8] = 0x2A;
1452			buf[9] = 30 - 10;
1453			buf[10] = 0x08;
1454			buf[12] = 0x71;
1455			be16enc(&buf[18], 2);
1456			be16enc(&buf[20], 512);
1457			write_prdt(p, slot, cfis, buf, len);
1458			tfd = ATA_S_READY | ATA_S_DSC;
1459			break;
1460		}
1461		default:
1462			goto error;
1463			break;
1464		}
1465		break;
1466	case 3:
1467		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1468		p->asc = 0x39;
1469		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1470		break;
1471error:
1472	case 1:
1473	case 2:
1474		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1475		p->asc = 0x24;
1476		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1477		break;
1478	}
1479	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1480	ahci_write_fis_d2h(p, slot, cfis, tfd);
1481}
1482
1483static void
1484atapi_get_event_status_notification(struct ahci_port *p, int slot,
1485    uint8_t *cfis)
1486{
1487	uint8_t *acmd;
1488	uint32_t tfd;
1489
1490	acmd = cfis + 0x40;
1491
1492	/* we don't support asynchronous operation */
1493	if (!(acmd[1] & 1)) {
1494		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1495		p->asc = 0x24;
1496		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1497	} else {
1498		uint8_t buf[8];
1499		int len;
1500
1501		len = be16dec(acmd + 7);
1502		if (len > sizeof(buf))
1503			len = sizeof(buf);
1504
1505		memset(buf, 0, sizeof(buf));
1506		be16enc(buf, 8 - 2);
1507		buf[2] = 0x04;
1508		buf[3] = 0x10;
1509		buf[5] = 0x02;
1510		write_prdt(p, slot, cfis, buf, len);
1511		tfd = ATA_S_READY | ATA_S_DSC;
1512	}
1513	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1514	ahci_write_fis_d2h(p, slot, cfis, tfd);
1515}
1516
1517static void
1518handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1519{
1520	uint8_t *acmd;
1521
1522	acmd = cfis + 0x40;
1523
1524#ifdef AHCI_DEBUG
1525	{
1526		int i;
1527		DPRINTF("ACMD:");
1528		for (i = 0; i < 16; i++)
1529			DPRINTF("%02x ", acmd[i]);
1530		DPRINTF("\n");
1531	}
1532#endif
1533
1534	switch (acmd[0]) {
1535	case TEST_UNIT_READY:
1536		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1537		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1538		break;
1539	case INQUIRY:
1540		atapi_inquiry(p, slot, cfis);
1541		break;
1542	case READ_CAPACITY:
1543		atapi_read_capacity(p, slot, cfis);
1544		break;
1545	case PREVENT_ALLOW:
1546		/* TODO */
1547		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1548		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1549		break;
1550	case READ_TOC:
1551		atapi_read_toc(p, slot, cfis);
1552		break;
1553	case REPORT_LUNS:
1554		atapi_report_luns(p, slot, cfis);
1555		break;
1556	case READ_10:
1557	case READ_12:
1558		atapi_read(p, slot, cfis, 0);
1559		break;
1560	case REQUEST_SENSE:
1561		atapi_request_sense(p, slot, cfis);
1562		break;
1563	case START_STOP_UNIT:
1564		atapi_start_stop_unit(p, slot, cfis);
1565		break;
1566	case MODE_SENSE_10:
1567		atapi_mode_sense(p, slot, cfis);
1568		break;
1569	case GET_EVENT_STATUS_NOTIFICATION:
1570		atapi_get_event_status_notification(p, slot, cfis);
1571		break;
1572	default:
1573		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1574		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1575		p->asc = 0x20;
1576		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1577				ATA_S_READY | ATA_S_ERROR);
1578		break;
1579	}
1580}
1581
1582static void
1583ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1584{
1585
1586	p->tfd |= ATA_S_BUSY;
1587	switch (cfis[2]) {
1588	case ATA_ATA_IDENTIFY:
1589		handle_identify(p, slot, cfis);
1590		break;
1591	case ATA_SETFEATURES:
1592	{
1593		switch (cfis[3]) {
1594		case ATA_SF_ENAB_SATA_SF:
1595			switch (cfis[12]) {
1596			case ATA_SATA_SF_AN:
1597				p->tfd = ATA_S_DSC | ATA_S_READY;
1598				break;
1599			default:
1600				p->tfd = ATA_S_ERROR | ATA_S_READY;
1601				p->tfd |= (ATA_ERROR_ABORT << 8);
1602				break;
1603			}
1604			break;
1605		case ATA_SF_ENAB_WCACHE:
1606		case ATA_SF_DIS_WCACHE:
1607		case ATA_SF_ENAB_RCACHE:
1608		case ATA_SF_DIS_RCACHE:
1609			p->tfd = ATA_S_DSC | ATA_S_READY;
1610			break;
1611		case ATA_SF_SETXFER:
1612		{
1613			switch (cfis[12] & 0xf8) {
1614			case ATA_PIO:
1615			case ATA_PIO0:
1616				break;
1617			case ATA_WDMA0:
1618			case ATA_UDMA0:
1619				p->xfermode = (cfis[12] & 0x7);
1620				break;
1621			}
1622			p->tfd = ATA_S_DSC | ATA_S_READY;
1623			break;
1624		}
1625		default:
1626			p->tfd = ATA_S_ERROR | ATA_S_READY;
1627			p->tfd |= (ATA_ERROR_ABORT << 8);
1628			break;
1629		}
1630		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1631		break;
1632	}
1633	case ATA_SET_MULTI:
1634		if (cfis[12] != 0 &&
1635			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1636			p->tfd = ATA_S_ERROR | ATA_S_READY;
1637			p->tfd |= (ATA_ERROR_ABORT << 8);
1638		} else {
1639			p->mult_sectors = cfis[12];
1640			p->tfd = ATA_S_DSC | ATA_S_READY;
1641		}
1642		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1643		break;
1644	case ATA_READ:
1645	case ATA_WRITE:
1646	case ATA_READ48:
1647	case ATA_WRITE48:
1648	case ATA_READ_MUL:
1649	case ATA_WRITE_MUL:
1650	case ATA_READ_MUL48:
1651	case ATA_WRITE_MUL48:
1652	case ATA_READ_DMA:
1653	case ATA_WRITE_DMA:
1654	case ATA_READ_DMA48:
1655	case ATA_WRITE_DMA48:
1656	case ATA_READ_FPDMA_QUEUED:
1657	case ATA_WRITE_FPDMA_QUEUED:
1658		ahci_handle_rw(p, slot, cfis, 0);
1659		break;
1660	case ATA_FLUSHCACHE:
1661	case ATA_FLUSHCACHE48:
1662		ahci_handle_flush(p, slot, cfis);
1663		break;
1664	case ATA_DATA_SET_MANAGEMENT:
1665		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1666		    cfis[13] == 0 && cfis[12] == 1) {
1667			ahci_handle_dsm_trim(p, slot, cfis, 0);
1668			break;
1669		}
1670		ahci_write_fis_d2h(p, slot, cfis,
1671		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1672		break;
1673	case ATA_SEND_FPDMA_QUEUED:
1674		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1675		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1676		    cfis[11] == 0 && cfis[3] == 1) {
1677			ahci_handle_dsm_trim(p, slot, cfis, 0);
1678			break;
1679		}
1680		ahci_write_fis_d2h(p, slot, cfis,
1681		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1682		break;
1683	case ATA_READ_LOG_EXT:
1684	case ATA_READ_LOG_DMA_EXT:
1685		ahci_handle_read_log(p, slot, cfis);
1686		break;
1687	case ATA_SECURITY_FREEZE_LOCK:
1688	case ATA_SMART_CMD:
1689	case ATA_NOP:
1690		ahci_write_fis_d2h(p, slot, cfis,
1691		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1692		break;
1693	case ATA_CHECK_POWER_MODE:
1694		cfis[12] = 0xff;	/* always on */
1695		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1696		break;
1697	case ATA_STANDBY_CMD:
1698	case ATA_STANDBY_IMMEDIATE:
1699	case ATA_IDLE_CMD:
1700	case ATA_IDLE_IMMEDIATE:
1701	case ATA_SLEEP:
1702	case ATA_READ_VERIFY:
1703	case ATA_READ_VERIFY48:
1704		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1705		break;
1706	case ATA_ATAPI_IDENTIFY:
1707		handle_atapi_identify(p, slot, cfis);
1708		break;
1709	case ATA_PACKET_CMD:
1710		if (!p->atapi) {
1711			ahci_write_fis_d2h(p, slot, cfis,
1712			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1713		} else
1714			handle_packet_cmd(p, slot, cfis);
1715		break;
1716	default:
1717		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1718		ahci_write_fis_d2h(p, slot, cfis,
1719		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1720		break;
1721	}
1722}
1723
1724static void
1725ahci_handle_slot(struct ahci_port *p, int slot)
1726{
1727	struct ahci_cmd_hdr *hdr;
1728#ifdef AHCI_DEBUG
1729	struct ahci_prdt_entry *prdt;
1730#endif
1731	struct pci_ahci_softc *sc;
1732	uint8_t *cfis;
1733#ifdef AHCI_DEBUG
1734	int cfl;
1735#endif
1736
1737	sc = p->pr_sc;
1738	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1739#ifdef AHCI_DEBUG
1740	cfl = (hdr->flags & 0x1f) * 4;
1741#endif
1742	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1743			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1744#ifdef AHCI_DEBUG
1745	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1746
1747	DPRINTF("\ncfis:");
1748	for (i = 0; i < cfl; i++) {
1749		if (i % 10 == 0)
1750			DPRINTF("\n");
1751		DPRINTF("%02x ", cfis[i]);
1752	}
1753	DPRINTF("\n");
1754
1755	for (i = 0; i < hdr->prdtl; i++) {
1756		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1757		prdt++;
1758	}
1759#endif
1760
1761	if (cfis[0] != FIS_TYPE_REGH2D) {
1762		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1763		return;
1764	}
1765
1766	if (cfis[1] & 0x80) {
1767		ahci_handle_cmd(p, slot, cfis);
1768	} else {
1769		if (cfis[15] & (1 << 2))
1770			p->reset = 1;
1771		else if (p->reset) {
1772			p->reset = 0;
1773			ahci_port_reset(p);
1774		}
1775		p->ci &= ~(1 << slot);
1776	}
1777}
1778
1779static void
1780ahci_handle_port(struct ahci_port *p)
1781{
1782
1783	if (!(p->cmd & AHCI_P_CMD_ST))
1784		return;
1785
1786	/*
1787	 * Search for any new commands to issue ignoring those that
1788	 * are already in-flight.  Stop if device is busy or in error.
1789	 */
1790	for (; (p->ci & ~p->pending) != 0; p->ccs = ((p->ccs + 1) & 31)) {
1791		if ((p->tfd & (ATA_S_BUSY | ATA_S_DRQ)) != 0)
1792			break;
1793		if (p->waitforclear)
1794			break;
1795		if ((p->ci & ~p->pending & (1 << p->ccs)) != 0) {
1796			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1797			p->cmd |= p->ccs << AHCI_P_CMD_CCS_SHIFT;
1798			ahci_handle_slot(p, p->ccs);
1799		}
1800	}
1801}
1802
1803/*
1804 * blockif callback routine - this runs in the context of the blockif
1805 * i/o thread, so the mutex needs to be acquired.
1806 */
1807static void
1808ata_ioreq_cb(struct blockif_req *br, int err)
1809{
1810	struct ahci_cmd_hdr *hdr;
1811	struct ahci_ioreq *aior;
1812	struct ahci_port *p;
1813	struct pci_ahci_softc *sc;
1814	uint32_t tfd;
1815	uint8_t *cfis;
1816	int slot, ncq, dsm;
1817
1818	DPRINTF("%s %d\n", __func__, err);
1819
1820	ncq = dsm = 0;
1821	aior = br->br_param;
1822	p = aior->io_pr;
1823	cfis = aior->cfis;
1824	slot = aior->slot;
1825	sc = p->pr_sc;
1826	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1827
1828	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1829	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1830	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1831		ncq = 1;
1832	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
1833	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
1834	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
1835		dsm = 1;
1836
1837	pthread_mutex_lock(&sc->mtx);
1838
1839	/*
1840	 * Delete the blockif request from the busy list
1841	 */
1842	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1843
1844	/*
1845	 * Move the blockif request back to the free list
1846	 */
1847	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1848
1849	if (!err)
1850		hdr->prdbc = aior->done;
1851
1852	if (!err && aior->more) {
1853		if (dsm)
1854			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1855		else
1856			ahci_handle_rw(p, slot, cfis, aior->done);
1857		goto out;
1858	}
1859
1860	if (!err)
1861		tfd = ATA_S_READY | ATA_S_DSC;
1862	else
1863		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1864	if (ncq)
1865		ahci_write_fis_sdb(p, slot, cfis, tfd);
1866	else
1867		ahci_write_fis_d2h(p, slot, cfis, tfd);
1868
1869	/*
1870	 * This command is now complete.
1871	 */
1872	p->pending &= ~(1 << slot);
1873
1874	ahci_check_stopped(p);
1875	ahci_handle_port(p);
1876out:
1877	pthread_mutex_unlock(&sc->mtx);
1878	DPRINTF("%s exit\n", __func__);
1879}
1880
1881static void
1882atapi_ioreq_cb(struct blockif_req *br, int err)
1883{
1884	struct ahci_cmd_hdr *hdr;
1885	struct ahci_ioreq *aior;
1886	struct ahci_port *p;
1887	struct pci_ahci_softc *sc;
1888	uint8_t *cfis;
1889	uint32_t tfd;
1890	int slot;
1891
1892	DPRINTF("%s %d\n", __func__, err);
1893
1894	aior = br->br_param;
1895	p = aior->io_pr;
1896	cfis = aior->cfis;
1897	slot = aior->slot;
1898	sc = p->pr_sc;
1899	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1900
1901	pthread_mutex_lock(&sc->mtx);
1902
1903	/*
1904	 * Delete the blockif request from the busy list
1905	 */
1906	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1907
1908	/*
1909	 * Move the blockif request back to the free list
1910	 */
1911	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1912
1913	if (!err)
1914		hdr->prdbc = aior->done;
1915
1916	if (!err && aior->more) {
1917		atapi_read(p, slot, cfis, aior->done);
1918		goto out;
1919	}
1920
1921	if (!err) {
1922		tfd = ATA_S_READY | ATA_S_DSC;
1923	} else {
1924		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1925		p->asc = 0x21;
1926		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1927	}
1928	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1929	ahci_write_fis_d2h(p, slot, cfis, tfd);
1930
1931	/*
1932	 * This command is now complete.
1933	 */
1934	p->pending &= ~(1 << slot);
1935
1936	ahci_check_stopped(p);
1937	ahci_handle_port(p);
1938out:
1939	pthread_mutex_unlock(&sc->mtx);
1940	DPRINTF("%s exit\n", __func__);
1941}
1942
1943static void
1944pci_ahci_ioreq_init(struct ahci_port *pr)
1945{
1946	struct ahci_ioreq *vr;
1947	int i;
1948
1949	pr->ioqsz = blockif_queuesz(pr->bctx);
1950	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
1951	STAILQ_INIT(&pr->iofhd);
1952
1953	/*
1954	 * Add all i/o request entries to the free queue
1955	 */
1956	for (i = 0; i < pr->ioqsz; i++) {
1957		vr = &pr->ioreq[i];
1958		vr->io_pr = pr;
1959		if (!pr->atapi)
1960			vr->io_req.br_callback = ata_ioreq_cb;
1961		else
1962			vr->io_req.br_callback = atapi_ioreq_cb;
1963		vr->io_req.br_param = vr;
1964		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
1965	}
1966
1967	TAILQ_INIT(&pr->iobhd);
1968}
1969
1970static void
1971pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1972{
1973	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1974	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1975	struct ahci_port *p = &sc->port[port];
1976
1977	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1978		port, offset, value);
1979
1980	switch (offset) {
1981	case AHCI_P_CLB:
1982		p->clb = value;
1983		break;
1984	case AHCI_P_CLBU:
1985		p->clbu = value;
1986		break;
1987	case AHCI_P_FB:
1988		p->fb = value;
1989		break;
1990	case AHCI_P_FBU:
1991		p->fbu = value;
1992		break;
1993	case AHCI_P_IS:
1994		p->is &= ~value;
1995		break;
1996	case AHCI_P_IE:
1997		p->ie = value & 0xFDC000FF;
1998		ahci_generate_intr(sc);
1999		break;
2000	case AHCI_P_CMD:
2001	{
2002		p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2003		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2004		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2005		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK);
2006		p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2007		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2008		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2009		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value;
2010
2011		if (!(value & AHCI_P_CMD_ST)) {
2012			ahci_port_stop(p);
2013		} else {
2014			uint64_t clb;
2015
2016			p->cmd |= AHCI_P_CMD_CR;
2017			clb = (uint64_t)p->clbu << 32 | p->clb;
2018			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
2019					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
2020		}
2021
2022		if (value & AHCI_P_CMD_FRE) {
2023			uint64_t fb;
2024
2025			p->cmd |= AHCI_P_CMD_FR;
2026			fb = (uint64_t)p->fbu << 32 | p->fb;
2027			/* we don't support FBSCP, so rfis size is 256Bytes */
2028			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
2029		} else {
2030			p->cmd &= ~AHCI_P_CMD_FR;
2031		}
2032
2033		if (value & AHCI_P_CMD_CLO) {
2034			p->tfd &= ~(ATA_S_BUSY | ATA_S_DRQ);
2035			p->cmd &= ~AHCI_P_CMD_CLO;
2036		}
2037
2038		if (value & AHCI_P_CMD_ICC_MASK) {
2039			p->cmd &= ~AHCI_P_CMD_ICC_MASK;
2040		}
2041
2042		ahci_handle_port(p);
2043		break;
2044	}
2045	case AHCI_P_TFD:
2046	case AHCI_P_SIG:
2047	case AHCI_P_SSTS:
2048		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
2049		break;
2050	case AHCI_P_SCTL:
2051		p->sctl = value;
2052		if (!(p->cmd & AHCI_P_CMD_ST)) {
2053			if (value & ATA_SC_DET_RESET)
2054				ahci_port_reset(p);
2055		}
2056		break;
2057	case AHCI_P_SERR:
2058		p->serr &= ~value;
2059		break;
2060	case AHCI_P_SACT:
2061		p->sact |= value;
2062		break;
2063	case AHCI_P_CI:
2064		p->ci |= value;
2065		ahci_handle_port(p);
2066		break;
2067	case AHCI_P_SNTF:
2068	case AHCI_P_FBS:
2069	default:
2070		break;
2071	}
2072}
2073
2074static void
2075pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2076{
2077	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
2078		offset, value);
2079
2080	switch (offset) {
2081	case AHCI_CAP:
2082	case AHCI_PI:
2083	case AHCI_VS:
2084	case AHCI_CAP2:
2085		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
2086		break;
2087	case AHCI_GHC:
2088		if (value & AHCI_GHC_HR)
2089			ahci_reset(sc);
2090		else if (value & AHCI_GHC_IE) {
2091			sc->ghc |= AHCI_GHC_IE;
2092			ahci_generate_intr(sc);
2093		}
2094		break;
2095	case AHCI_IS:
2096		sc->is &= ~value;
2097		ahci_generate_intr(sc);
2098		break;
2099	default:
2100		break;
2101	}
2102}
2103
2104static void
2105pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
2106		int baridx, uint64_t offset, int size, uint64_t value)
2107{
2108	struct pci_ahci_softc *sc = pi->pi_arg;
2109
2110	assert(baridx == 5);
2111	assert((offset % 4) == 0 && size == 4);
2112
2113	pthread_mutex_lock(&sc->mtx);
2114
2115	if (offset < AHCI_OFFSET)
2116		pci_ahci_host_write(sc, offset, value);
2117	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2118		pci_ahci_port_write(sc, offset, value);
2119	else
2120		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
2121
2122	pthread_mutex_unlock(&sc->mtx);
2123}
2124
2125static uint64_t
2126pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2127{
2128	uint32_t value;
2129
2130	switch (offset) {
2131	case AHCI_CAP:
2132	case AHCI_GHC:
2133	case AHCI_IS:
2134	case AHCI_PI:
2135	case AHCI_VS:
2136	case AHCI_CCCC:
2137	case AHCI_CCCP:
2138	case AHCI_EM_LOC:
2139	case AHCI_EM_CTL:
2140	case AHCI_CAP2:
2141	{
2142		uint32_t *p = &sc->cap;
2143		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2144		value = *p;
2145		break;
2146	}
2147	default:
2148		value = 0;
2149		break;
2150	}
2151	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
2152		offset, value);
2153
2154	return (value);
2155}
2156
2157static uint64_t
2158pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2159{
2160	uint32_t value;
2161	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2162	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2163
2164	switch (offset) {
2165	case AHCI_P_CLB:
2166	case AHCI_P_CLBU:
2167	case AHCI_P_FB:
2168	case AHCI_P_FBU:
2169	case AHCI_P_IS:
2170	case AHCI_P_IE:
2171	case AHCI_P_CMD:
2172	case AHCI_P_TFD:
2173	case AHCI_P_SIG:
2174	case AHCI_P_SSTS:
2175	case AHCI_P_SCTL:
2176	case AHCI_P_SERR:
2177	case AHCI_P_SACT:
2178	case AHCI_P_CI:
2179	case AHCI_P_SNTF:
2180	case AHCI_P_FBS:
2181	{
2182		uint32_t *p= &sc->port[port].clb;
2183		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2184		value = *p;
2185		break;
2186	}
2187	default:
2188		value = 0;
2189		break;
2190	}
2191
2192	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
2193		port, offset, value);
2194
2195	return value;
2196}
2197
2198static uint64_t
2199pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
2200    uint64_t regoff, int size)
2201{
2202	struct pci_ahci_softc *sc = pi->pi_arg;
2203	uint64_t offset;
2204	uint32_t value;
2205
2206	assert(baridx == 5);
2207	assert(size == 1 || size == 2 || size == 4);
2208	assert((regoff & (size - 1)) == 0);
2209
2210	pthread_mutex_lock(&sc->mtx);
2211
2212	offset = regoff & ~0x3;	    /* round down to a multiple of 4 bytes */
2213	if (offset < AHCI_OFFSET)
2214		value = pci_ahci_host_read(sc, offset);
2215	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2216		value = pci_ahci_port_read(sc, offset);
2217	else {
2218		value = 0;
2219		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n",
2220		    regoff);
2221	}
2222	value >>= 8 * (regoff & 0x3);
2223
2224	pthread_mutex_unlock(&sc->mtx);
2225
2226	return (value);
2227}
2228
2229static int
2230pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
2231{
2232	char bident[sizeof("XX:X:X")];
2233	struct blockif_ctxt *bctxt;
2234	struct pci_ahci_softc *sc;
2235	int ret, slots;
2236	MD5_CTX mdctx;
2237	u_char digest[16];
2238
2239	ret = 0;
2240
2241	if (opts == NULL) {
2242		fprintf(stderr, "pci_ahci: backing device required\n");
2243		return (1);
2244	}
2245
2246#ifdef AHCI_DEBUG
2247	dbg = fopen("/tmp/log", "w+");
2248#endif
2249
2250	sc = calloc(1, sizeof(struct pci_ahci_softc));
2251	pi->pi_arg = sc;
2252	sc->asc_pi = pi;
2253	sc->ports = MAX_PORTS;
2254
2255	/*
2256	 * Only use port 0 for a backing device. All other ports will be
2257	 * marked as unused
2258	 */
2259	sc->port[0].atapi = atapi;
2260
2261	/*
2262	 * Attempt to open the backing image. Use the PCI
2263	 * slot/func for the identifier string.
2264	 */
2265	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
2266	bctxt = blockif_open(opts, bident);
2267	if (bctxt == NULL) {
2268		ret = 1;
2269		goto open_fail;
2270	}
2271	sc->port[0].bctx = bctxt;
2272	sc->port[0].pr_sc = sc;
2273
2274	/*
2275	 * Create an identifier for the backing file. Use parts of the
2276	 * md5 sum of the filename
2277	 */
2278	MD5Init(&mdctx);
2279	MD5Update(&mdctx, opts, strlen(opts));
2280	MD5Final(digest, &mdctx);
2281	sprintf(sc->port[0].ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X",
2282	    digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]);
2283
2284	/*
2285	 * Allocate blockif request structures and add them
2286	 * to the free list
2287	 */
2288	pci_ahci_ioreq_init(&sc->port[0]);
2289
2290	pthread_mutex_init(&sc->mtx, NULL);
2291
2292	/* Intel ICH8 AHCI */
2293	slots = sc->port[0].ioqsz;
2294	if (slots > 32)
2295		slots = 32;
2296	--slots;
2297	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2298	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2299	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2300	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2301	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2302
2303	/* Only port 0 implemented */
2304	sc->pi = 1;
2305	sc->vs = 0x10300;
2306	sc->cap2 = AHCI_CAP2_APST;
2307	ahci_reset(sc);
2308
2309	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2310	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2311	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2312	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2313	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2314	pci_emul_add_msicap(pi, 1);
2315	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2316	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2317
2318	pci_lintr_request(pi);
2319
2320open_fail:
2321	if (ret) {
2322		if (sc->port[0].bctx != NULL)
2323			blockif_close(sc->port[0].bctx);
2324		free(sc);
2325	}
2326
2327	return (ret);
2328}
2329
2330static int
2331pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2332{
2333
2334	return (pci_ahci_init(ctx, pi, opts, 0));
2335}
2336
2337static int
2338pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2339{
2340
2341	return (pci_ahci_init(ctx, pi, opts, 1));
2342}
2343
2344/*
2345 * Use separate emulation names to distinguish drive and atapi devices
2346 */
2347struct pci_devemu pci_de_ahci_hd = {
2348	.pe_emu =	"ahci-hd",
2349	.pe_init =	pci_ahci_hd_init,
2350	.pe_barwrite =	pci_ahci_write,
2351	.pe_barread =	pci_ahci_read
2352};
2353PCI_EMUL_SET(pci_de_ahci_hd);
2354
2355struct pci_devemu pci_de_ahci_cd = {
2356	.pe_emu =	"ahci-cd",
2357	.pe_init =	pci_ahci_atapi_init,
2358	.pe_barwrite =	pci_ahci_write,
2359	.pe_barread =	pci_ahci_read
2360};
2361PCI_EMUL_SET(pci_de_ahci_cd);
2362