1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
5 * Copyright (c) 2015-2016 Alexander Motin <mav@FreeBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD$
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD$");
34
35#include <sys/param.h>
36#include <sys/linker_set.h>
37#include <sys/stat.h>
38#include <sys/uio.h>
39#include <sys/ioctl.h>
40#include <sys/disk.h>
41#include <sys/ata.h>
42#include <sys/endian.h>
43
44#include <errno.h>
45#include <fcntl.h>
46#include <stdio.h>
47#include <stdlib.h>
48#include <stdint.h>
49#include <string.h>
50#include <strings.h>
51#include <unistd.h>
52#include <assert.h>
53#include <pthread.h>
54#include <pthread_np.h>
55#include <inttypes.h>
56#include <md5.h>
57
58#include "bhyverun.h"
59#include "pci_emul.h"
60#include "ahci.h"
61#include "block_if.h"
62
63#define	DEF_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
64#define	MAX_PORTS	32	/* AHCI supports 32 ports */
65
66#define	PxSIG_ATA	0x00000101 /* ATA drive */
67#define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
68
69enum sata_fis_type {
70	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
71	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
72	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
73	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
74	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
75	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
76	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
77	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
78};
79
80/*
81 * SCSI opcodes
82 */
83#define	TEST_UNIT_READY		0x00
84#define	REQUEST_SENSE		0x03
85#define	INQUIRY			0x12
86#define	START_STOP_UNIT		0x1B
87#define	PREVENT_ALLOW		0x1E
88#define	READ_CAPACITY		0x25
89#define	READ_10			0x28
90#define	POSITION_TO_ELEMENT	0x2B
91#define	READ_TOC		0x43
92#define	GET_EVENT_STATUS_NOTIFICATION 0x4A
93#define	MODE_SENSE_10		0x5A
94#define	REPORT_LUNS		0xA0
95#define	READ_12			0xA8
96#define	READ_CD			0xBE
97
98/*
99 * SCSI mode page codes
100 */
101#define	MODEPAGE_RW_ERROR_RECOVERY	0x01
102#define	MODEPAGE_CD_CAPABILITIES	0x2A
103
104/*
105 * ATA commands
106 */
107#define	ATA_SF_ENAB_SATA_SF		0x10
108#define	ATA_SATA_SF_AN			0x05
109#define	ATA_SF_DIS_SATA_SF		0x90
110
111/*
112 * Debug printf
113 */
114#ifdef AHCI_DEBUG
115static FILE *dbg;
116#define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
117#else
118#define DPRINTF(format, arg...)
119#endif
120#define WPRINTF(format, arg...) printf(format, ##arg)
121
122#define AHCI_PORT_IDENT 20 + 1
123
124struct ahci_ioreq {
125	struct blockif_req io_req;
126	struct ahci_port *io_pr;
127	STAILQ_ENTRY(ahci_ioreq) io_flist;
128	TAILQ_ENTRY(ahci_ioreq) io_blist;
129	uint8_t *cfis;
130	uint32_t len;
131	uint32_t done;
132	int slot;
133	int more;
134};
135
136struct ahci_port {
137	struct blockif_ctxt *bctx;
138	struct pci_ahci_softc *pr_sc;
139	struct ata_params ata_ident;
140	uint8_t *cmd_lst;
141	uint8_t *rfis;
142	int port;
143	int atapi;
144	int reset;
145	int waitforclear;
146	int mult_sectors;
147	uint8_t xfermode;
148	uint8_t err_cfis[20];
149	uint8_t sense_key;
150	uint8_t asc;
151	u_int ccs;
152	uint32_t pending;
153
154	uint32_t clb;
155	uint32_t clbu;
156	uint32_t fb;
157	uint32_t fbu;
158	uint32_t is;
159	uint32_t ie;
160	uint32_t cmd;
161	uint32_t unused0;
162	uint32_t tfd;
163	uint32_t sig;
164	uint32_t ssts;
165	uint32_t sctl;
166	uint32_t serr;
167	uint32_t sact;
168	uint32_t ci;
169	uint32_t sntf;
170	uint32_t fbs;
171
172	/*
173	 * i/o request info
174	 */
175	struct ahci_ioreq *ioreq;
176	int ioqsz;
177	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
178	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
179};
180
181struct ahci_cmd_hdr {
182	uint16_t flags;
183	uint16_t prdtl;
184	uint32_t prdbc;
185	uint64_t ctba;
186	uint32_t reserved[4];
187};
188
189struct ahci_prdt_entry {
190	uint64_t dba;
191	uint32_t reserved;
192#define	DBCMASK		0x3fffff
193	uint32_t dbc;
194};
195
196struct pci_ahci_softc {
197	struct pci_devinst *asc_pi;
198	pthread_mutex_t	mtx;
199	int ports;
200	uint32_t cap;
201	uint32_t ghc;
202	uint32_t is;
203	uint32_t pi;
204	uint32_t vs;
205	uint32_t ccc_ctl;
206	uint32_t ccc_pts;
207	uint32_t em_loc;
208	uint32_t em_ctl;
209	uint32_t cap2;
210	uint32_t bohc;
211	uint32_t lintr;
212	struct ahci_port port[MAX_PORTS];
213};
214#define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
215
216static void ahci_handle_port(struct ahci_port *p);
217
218static inline void lba_to_msf(uint8_t *buf, int lba)
219{
220	lba += 150;
221	buf[0] = (lba / 75) / 60;
222	buf[1] = (lba / 75) % 60;
223	buf[2] = lba % 75;
224}
225
226/*
227 * Generate HBA interrupts on global IS register write.
228 */
229static void
230ahci_generate_intr(struct pci_ahci_softc *sc, uint32_t mask)
231{
232	struct pci_devinst *pi = sc->asc_pi;
233	struct ahci_port *p;
234	int i, nmsg;
235	uint32_t mmask;
236
237	/* Update global IS from PxIS/PxIE. */
238	for (i = 0; i < sc->ports; i++) {
239		p = &sc->port[i];
240		if (p->is & p->ie)
241			sc->is |= (1 << i);
242	}
243	DPRINTF("%s(%08x) %08x\n", __func__, mask, sc->is);
244
245	/* If there is nothing enabled -- clear legacy interrupt and exit. */
246	if (sc->is == 0 || (sc->ghc & AHCI_GHC_IE) == 0) {
247		if (sc->lintr) {
248			pci_lintr_deassert(pi);
249			sc->lintr = 0;
250		}
251		return;
252	}
253
254	/* If there is anything and no MSI -- assert legacy interrupt. */
255	nmsg = pci_msi_maxmsgnum(pi);
256	if (nmsg == 0) {
257		if (!sc->lintr) {
258			sc->lintr = 1;
259			pci_lintr_assert(pi);
260		}
261		return;
262	}
263
264	/* Assert respective MSIs for ports that were touched. */
265	for (i = 0; i < nmsg; i++) {
266		if (sc->ports <= nmsg || i < nmsg - 1)
267			mmask = 1 << i;
268		else
269			mmask = 0xffffffff << i;
270		if (sc->is & mask && mmask & mask)
271			pci_generate_msi(pi, i);
272	}
273}
274
275/*
276 * Generate HBA interrupt on specific port event.
277 */
278static void
279ahci_port_intr(struct ahci_port *p)
280{
281	struct pci_ahci_softc *sc = p->pr_sc;
282	struct pci_devinst *pi = sc->asc_pi;
283	int nmsg;
284
285	DPRINTF("%s(%d) %08x/%08x %08x\n", __func__,
286	    p->port, p->is, p->ie, sc->is);
287
288	/* If there is nothing enabled -- we are done. */
289	if ((p->is & p->ie) == 0)
290		return;
291
292	/* In case of non-shared MSI always generate interrupt. */
293	nmsg = pci_msi_maxmsgnum(pi);
294	if (sc->ports <= nmsg || p->port < nmsg - 1) {
295		sc->is |= (1 << p->port);
296		if ((sc->ghc & AHCI_GHC_IE) == 0)
297			return;
298		pci_generate_msi(pi, p->port);
299		return;
300	}
301
302	/* If IS for this port is already set -- do nothing. */
303	if (sc->is & (1 << p->port))
304		return;
305
306	sc->is |= (1 << p->port);
307
308	/* If interrupts are enabled -- generate one. */
309	if ((sc->ghc & AHCI_GHC_IE) == 0)
310		return;
311	if (nmsg > 0) {
312		pci_generate_msi(pi, nmsg - 1);
313	} else if (!sc->lintr) {
314		sc->lintr = 1;
315		pci_lintr_assert(pi);
316	}
317}
318
319static void
320ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
321{
322	int offset, len, irq;
323
324	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
325		return;
326
327	switch (ft) {
328	case FIS_TYPE_REGD2H:
329		offset = 0x40;
330		len = 20;
331		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_DHR : 0;
332		break;
333	case FIS_TYPE_SETDEVBITS:
334		offset = 0x58;
335		len = 8;
336		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_SDB : 0;
337		break;
338	case FIS_TYPE_PIOSETUP:
339		offset = 0x20;
340		len = 20;
341		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_PS : 0;
342		break;
343	default:
344		WPRINTF("unsupported fis type %d\n", ft);
345		return;
346	}
347	if (fis[2] & ATA_S_ERROR) {
348		p->waitforclear = 1;
349		irq |= AHCI_P_IX_TFE;
350	}
351	memcpy(p->rfis + offset, fis, len);
352	if (irq) {
353		if (~p->is & irq) {
354			p->is |= irq;
355			ahci_port_intr(p);
356		}
357	}
358}
359
360static void
361ahci_write_fis_piosetup(struct ahci_port *p)
362{
363	uint8_t fis[20];
364
365	memset(fis, 0, sizeof(fis));
366	fis[0] = FIS_TYPE_PIOSETUP;
367	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
368}
369
370static void
371ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
372{
373	uint8_t fis[8];
374	uint8_t error;
375
376	error = (tfd >> 8) & 0xff;
377	tfd &= 0x77;
378	memset(fis, 0, sizeof(fis));
379	fis[0] = FIS_TYPE_SETDEVBITS;
380	fis[1] = (1 << 6);
381	fis[2] = tfd;
382	fis[3] = error;
383	if (fis[2] & ATA_S_ERROR) {
384		p->err_cfis[0] = slot;
385		p->err_cfis[2] = tfd;
386		p->err_cfis[3] = error;
387		memcpy(&p->err_cfis[4], cfis + 4, 16);
388	} else {
389		*(uint32_t *)(fis + 4) = (1 << slot);
390		p->sact &= ~(1 << slot);
391	}
392	p->tfd &= ~0x77;
393	p->tfd |= tfd;
394	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
395}
396
397static void
398ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
399{
400	uint8_t fis[20];
401	uint8_t error;
402
403	error = (tfd >> 8) & 0xff;
404	memset(fis, 0, sizeof(fis));
405	fis[0] = FIS_TYPE_REGD2H;
406	fis[1] = (1 << 6);
407	fis[2] = tfd & 0xff;
408	fis[3] = error;
409	fis[4] = cfis[4];
410	fis[5] = cfis[5];
411	fis[6] = cfis[6];
412	fis[7] = cfis[7];
413	fis[8] = cfis[8];
414	fis[9] = cfis[9];
415	fis[10] = cfis[10];
416	fis[11] = cfis[11];
417	fis[12] = cfis[12];
418	fis[13] = cfis[13];
419	if (fis[2] & ATA_S_ERROR) {
420		p->err_cfis[0] = 0x80;
421		p->err_cfis[2] = tfd & 0xff;
422		p->err_cfis[3] = error;
423		memcpy(&p->err_cfis[4], cfis + 4, 16);
424	} else
425		p->ci &= ~(1 << slot);
426	p->tfd = tfd;
427	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
428}
429
430static void
431ahci_write_fis_d2h_ncq(struct ahci_port *p, int slot)
432{
433	uint8_t fis[20];
434
435	p->tfd = ATA_S_READY | ATA_S_DSC;
436	memset(fis, 0, sizeof(fis));
437	fis[0] = FIS_TYPE_REGD2H;
438	fis[1] = 0;			/* No interrupt */
439	fis[2] = p->tfd;		/* Status */
440	fis[3] = 0;			/* No error */
441	p->ci &= ~(1 << slot);
442	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
443}
444
445static void
446ahci_write_reset_fis_d2h(struct ahci_port *p)
447{
448	uint8_t fis[20];
449
450	memset(fis, 0, sizeof(fis));
451	fis[0] = FIS_TYPE_REGD2H;
452	fis[3] = 1;
453	fis[4] = 1;
454	if (p->atapi) {
455		fis[5] = 0x14;
456		fis[6] = 0xeb;
457	}
458	fis[12] = 1;
459	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
460}
461
462static void
463ahci_check_stopped(struct ahci_port *p)
464{
465	/*
466	 * If we are no longer processing the command list and nothing
467	 * is in-flight, clear the running bit, the current command
468	 * slot, the command issue and active bits.
469	 */
470	if (!(p->cmd & AHCI_P_CMD_ST)) {
471		if (p->pending == 0) {
472			p->ccs = 0;
473			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
474			p->ci = 0;
475			p->sact = 0;
476			p->waitforclear = 0;
477		}
478	}
479}
480
481static void
482ahci_port_stop(struct ahci_port *p)
483{
484	struct ahci_ioreq *aior;
485	uint8_t *cfis;
486	int slot;
487	int error;
488
489	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
490
491	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
492		/*
493		 * Try to cancel the outstanding blockif request.
494		 */
495		error = blockif_cancel(p->bctx, &aior->io_req);
496		if (error != 0)
497			continue;
498
499		slot = aior->slot;
500		cfis = aior->cfis;
501		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
502		    cfis[2] == ATA_READ_FPDMA_QUEUED ||
503		    cfis[2] == ATA_SEND_FPDMA_QUEUED)
504			p->sact &= ~(1 << slot);	/* NCQ */
505		else
506			p->ci &= ~(1 << slot);
507
508		/*
509		 * This command is now done.
510		 */
511		p->pending &= ~(1 << slot);
512
513		/*
514		 * Delete the blockif request from the busy list
515		 */
516		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
517
518		/*
519		 * Move the blockif request back to the free list
520		 */
521		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
522	}
523
524	ahci_check_stopped(p);
525}
526
527static void
528ahci_port_reset(struct ahci_port *pr)
529{
530	pr->serr = 0;
531	pr->sact = 0;
532	pr->xfermode = ATA_UDMA6;
533	pr->mult_sectors = 128;
534
535	if (!pr->bctx) {
536		pr->ssts = ATA_SS_DET_NO_DEVICE;
537		pr->sig = 0xFFFFFFFF;
538		pr->tfd = 0x7F;
539		return;
540	}
541	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
542	if (pr->sctl & ATA_SC_SPD_MASK)
543		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
544	else
545		pr->ssts |= ATA_SS_SPD_GEN3;
546	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
547	if (!pr->atapi) {
548		pr->sig = PxSIG_ATA;
549		pr->tfd |= ATA_S_READY;
550	} else
551		pr->sig = PxSIG_ATAPI;
552	ahci_write_reset_fis_d2h(pr);
553}
554
555static void
556ahci_reset(struct pci_ahci_softc *sc)
557{
558	int i;
559
560	sc->ghc = AHCI_GHC_AE;
561	sc->is = 0;
562
563	if (sc->lintr) {
564		pci_lintr_deassert(sc->asc_pi);
565		sc->lintr = 0;
566	}
567
568	for (i = 0; i < sc->ports; i++) {
569		sc->port[i].ie = 0;
570		sc->port[i].is = 0;
571		sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD);
572		if (sc->port[i].bctx)
573			sc->port[i].cmd |= AHCI_P_CMD_CPS;
574		sc->port[i].sctl = 0;
575		ahci_port_reset(&sc->port[i]);
576	}
577}
578
579static void
580ata_string(uint8_t *dest, const char *src, int len)
581{
582	int i;
583
584	for (i = 0; i < len; i++) {
585		if (*src)
586			dest[i ^ 1] = *src++;
587		else
588			dest[i ^ 1] = ' ';
589	}
590}
591
592static void
593atapi_string(uint8_t *dest, const char *src, int len)
594{
595	int i;
596
597	for (i = 0; i < len; i++) {
598		if (*src)
599			dest[i] = *src++;
600		else
601			dest[i] = ' ';
602	}
603}
604
605/*
606 * Build up the iovec based on the PRDT, 'done' and 'len'.
607 */
608static void
609ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
610    struct ahci_prdt_entry *prdt, uint16_t prdtl)
611{
612	struct blockif_req *breq = &aior->io_req;
613	int i, j, skip, todo, left, extra;
614	uint32_t dbcsz;
615
616	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
617	skip = aior->done;
618	left = aior->len - aior->done;
619	todo = 0;
620	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
621	    i++, prdt++) {
622		dbcsz = (prdt->dbc & DBCMASK) + 1;
623		/* Skip already done part of the PRDT */
624		if (dbcsz <= skip) {
625			skip -= dbcsz;
626			continue;
627		}
628		dbcsz -= skip;
629		if (dbcsz > left)
630			dbcsz = left;
631		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
632		    prdt->dba + skip, dbcsz);
633		breq->br_iov[j].iov_len = dbcsz;
634		todo += dbcsz;
635		left -= dbcsz;
636		skip = 0;
637		j++;
638	}
639
640	/* If we got limited by IOV length, round I/O down to sector size. */
641	if (j == BLOCKIF_IOV_MAX) {
642		extra = todo % blockif_sectsz(p->bctx);
643		todo -= extra;
644		assert(todo > 0);
645		while (extra > 0) {
646			if (breq->br_iov[j - 1].iov_len > extra) {
647				breq->br_iov[j - 1].iov_len -= extra;
648				break;
649			}
650			extra -= breq->br_iov[j - 1].iov_len;
651			j--;
652		}
653	}
654
655	breq->br_iovcnt = j;
656	breq->br_resid = todo;
657	aior->done += todo;
658	aior->more = (aior->done < aior->len && i < prdtl);
659}
660
661static void
662ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
663{
664	struct ahci_ioreq *aior;
665	struct blockif_req *breq;
666	struct ahci_prdt_entry *prdt;
667	struct ahci_cmd_hdr *hdr;
668	uint64_t lba;
669	uint32_t len;
670	int err, first, ncq, readop;
671
672	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
673	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
674	ncq = 0;
675	readop = 1;
676	first = (done == 0);
677
678	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
679	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
680	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
681	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
682		readop = 0;
683
684	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
685	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
686		lba = ((uint64_t)cfis[10] << 40) |
687			((uint64_t)cfis[9] << 32) |
688			((uint64_t)cfis[8] << 24) |
689			((uint64_t)cfis[6] << 16) |
690			((uint64_t)cfis[5] << 8) |
691			cfis[4];
692		len = cfis[11] << 8 | cfis[3];
693		if (!len)
694			len = 65536;
695		ncq = 1;
696	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
697	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
698	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
699		lba = ((uint64_t)cfis[10] << 40) |
700			((uint64_t)cfis[9] << 32) |
701			((uint64_t)cfis[8] << 24) |
702			((uint64_t)cfis[6] << 16) |
703			((uint64_t)cfis[5] << 8) |
704			cfis[4];
705		len = cfis[13] << 8 | cfis[12];
706		if (!len)
707			len = 65536;
708	} else {
709		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
710			(cfis[5] << 8) | cfis[4];
711		len = cfis[12];
712		if (!len)
713			len = 256;
714	}
715	lba *= blockif_sectsz(p->bctx);
716	len *= blockif_sectsz(p->bctx);
717
718	/* Pull request off free list */
719	aior = STAILQ_FIRST(&p->iofhd);
720	assert(aior != NULL);
721	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
722
723	aior->cfis = cfis;
724	aior->slot = slot;
725	aior->len = len;
726	aior->done = done;
727	breq = &aior->io_req;
728	breq->br_offset = lba + done;
729	ahci_build_iov(p, aior, prdt, hdr->prdtl);
730
731	/* Mark this command in-flight. */
732	p->pending |= 1 << slot;
733
734	/* Stuff request onto busy list. */
735	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
736
737	if (ncq && first)
738		ahci_write_fis_d2h_ncq(p, slot);
739
740	if (readop)
741		err = blockif_read(p->bctx, breq);
742	else
743		err = blockif_write(p->bctx, breq);
744	assert(err == 0);
745}
746
747static void
748ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
749{
750	struct ahci_ioreq *aior;
751	struct blockif_req *breq;
752	int err;
753
754	/*
755	 * Pull request off free list
756	 */
757	aior = STAILQ_FIRST(&p->iofhd);
758	assert(aior != NULL);
759	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
760	aior->cfis = cfis;
761	aior->slot = slot;
762	aior->len = 0;
763	aior->done = 0;
764	aior->more = 0;
765	breq = &aior->io_req;
766
767	/*
768	 * Mark this command in-flight.
769	 */
770	p->pending |= 1 << slot;
771
772	/*
773	 * Stuff request onto busy list
774	 */
775	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
776
777	err = blockif_flush(p->bctx, breq);
778	assert(err == 0);
779}
780
781static inline void
782read_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
783		void *buf, int size)
784{
785	struct ahci_cmd_hdr *hdr;
786	struct ahci_prdt_entry *prdt;
787	void *to;
788	int i, len;
789
790	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
791	len = size;
792	to = buf;
793	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
794	for (i = 0; i < hdr->prdtl && len; i++) {
795		uint8_t *ptr;
796		uint32_t dbcsz;
797		int sublen;
798
799		dbcsz = (prdt->dbc & DBCMASK) + 1;
800		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
801		sublen = MIN(len, dbcsz);
802		memcpy(to, ptr, sublen);
803		len -= sublen;
804		to += sublen;
805		prdt++;
806	}
807}
808
809static void
810ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
811{
812	struct ahci_ioreq *aior;
813	struct blockif_req *breq;
814	uint8_t *entry;
815	uint64_t elba;
816	uint32_t len, elen;
817	int err, first, ncq;
818	uint8_t buf[512];
819
820	first = (done == 0);
821	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
822		len = (uint16_t)cfis[13] << 8 | cfis[12];
823		len *= 512;
824		ncq = 0;
825	} else { /* ATA_SEND_FPDMA_QUEUED */
826		len = (uint16_t)cfis[11] << 8 | cfis[3];
827		len *= 512;
828		ncq = 1;
829	}
830	read_prdt(p, slot, cfis, buf, sizeof(buf));
831
832next:
833	entry = &buf[done];
834	elba = ((uint64_t)entry[5] << 40) |
835		((uint64_t)entry[4] << 32) |
836		((uint64_t)entry[3] << 24) |
837		((uint64_t)entry[2] << 16) |
838		((uint64_t)entry[1] << 8) |
839		entry[0];
840	elen = (uint16_t)entry[7] << 8 | entry[6];
841	done += 8;
842	if (elen == 0) {
843		if (done >= len) {
844			if (ncq) {
845				if (first)
846					ahci_write_fis_d2h_ncq(p, slot);
847				ahci_write_fis_sdb(p, slot, cfis,
848				    ATA_S_READY | ATA_S_DSC);
849			} else {
850				ahci_write_fis_d2h(p, slot, cfis,
851				    ATA_S_READY | ATA_S_DSC);
852			}
853			p->pending &= ~(1 << slot);
854			ahci_check_stopped(p);
855			if (!first)
856				ahci_handle_port(p);
857			return;
858		}
859		goto next;
860	}
861
862	/*
863	 * Pull request off free list
864	 */
865	aior = STAILQ_FIRST(&p->iofhd);
866	assert(aior != NULL);
867	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
868	aior->cfis = cfis;
869	aior->slot = slot;
870	aior->len = len;
871	aior->done = done;
872	aior->more = (len != done);
873
874	breq = &aior->io_req;
875	breq->br_offset = elba * blockif_sectsz(p->bctx);
876	breq->br_resid = elen * blockif_sectsz(p->bctx);
877
878	/*
879	 * Mark this command in-flight.
880	 */
881	p->pending |= 1 << slot;
882
883	/*
884	 * Stuff request onto busy list
885	 */
886	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
887
888	if (ncq && first)
889		ahci_write_fis_d2h_ncq(p, slot);
890
891	err = blockif_delete(p->bctx, breq);
892	assert(err == 0);
893}
894
895static inline void
896write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
897		void *buf, int size)
898{
899	struct ahci_cmd_hdr *hdr;
900	struct ahci_prdt_entry *prdt;
901	void *from;
902	int i, len;
903
904	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
905	len = size;
906	from = buf;
907	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
908	for (i = 0; i < hdr->prdtl && len; i++) {
909		uint8_t *ptr;
910		uint32_t dbcsz;
911		int sublen;
912
913		dbcsz = (prdt->dbc & DBCMASK) + 1;
914		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
915		sublen = MIN(len, dbcsz);
916		memcpy(ptr, from, sublen);
917		len -= sublen;
918		from += sublen;
919		prdt++;
920	}
921	hdr->prdbc = size - len;
922}
923
924static void
925ahci_checksum(uint8_t *buf, int size)
926{
927	int i;
928	uint8_t sum = 0;
929
930	for (i = 0; i < size - 1; i++)
931		sum += buf[i];
932	buf[size - 1] = 0x100 - sum;
933}
934
935static void
936ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
937{
938	struct ahci_cmd_hdr *hdr;
939	uint32_t buf[128];
940	uint8_t *buf8 = (uint8_t *)buf;
941	uint16_t *buf16 = (uint16_t *)buf;
942
943	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
944	if (p->atapi || hdr->prdtl == 0 || cfis[5] != 0 ||
945	    cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
946		ahci_write_fis_d2h(p, slot, cfis,
947		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
948		return;
949	}
950
951	memset(buf, 0, sizeof(buf));
952	if (cfis[4] == 0x00) {	/* Log directory */
953		buf16[0x00] = 1; /* Version -- 1 */
954		buf16[0x10] = 1; /* NCQ Command Error Log -- 1 page */
955		buf16[0x13] = 1; /* SATA NCQ Send and Receive Log -- 1 page */
956	} else if (cfis[4] == 0x10) {	/* NCQ Command Error Log */
957		memcpy(buf8, p->err_cfis, sizeof(p->err_cfis));
958		ahci_checksum(buf8, sizeof(buf));
959	} else if (cfis[4] == 0x13) {	/* SATA NCQ Send and Receive Log */
960		if (blockif_candelete(p->bctx) && !blockif_is_ro(p->bctx)) {
961			buf[0x00] = 1;	/* SFQ DSM supported */
962			buf[0x01] = 1;	/* SFQ DSM TRIM supported */
963		}
964	} else {
965		ahci_write_fis_d2h(p, slot, cfis,
966		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
967		return;
968	}
969
970	if (cfis[2] == ATA_READ_LOG_EXT)
971		ahci_write_fis_piosetup(p);
972	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
973	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
974}
975
976static void
977handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
978{
979	struct ahci_cmd_hdr *hdr;
980
981	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
982	if (p->atapi || hdr->prdtl == 0) {
983		ahci_write_fis_d2h(p, slot, cfis,
984		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
985	} else {
986		ahci_write_fis_piosetup(p);
987		write_prdt(p, slot, cfis, (void*)&p->ata_ident, sizeof(struct ata_params));
988		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
989	}
990}
991
992static void
993ata_identify_init(struct ahci_port* p, int atapi)
994{
995	struct ata_params* ata_ident = &p->ata_ident;
996
997	if (atapi) {
998		ata_ident->config = ATA_PROTO_ATAPI | ATA_ATAPI_TYPE_CDROM |
999		    ATA_ATAPI_REMOVABLE | ATA_DRQ_FAST;
1000		ata_ident->capabilities1 = ATA_SUPPORT_LBA |
1001			ATA_SUPPORT_DMA;
1002		ata_ident->capabilities2 = (1 << 14 | 1);
1003		ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88;
1004		ata_ident->obsolete62 = 0x3f;
1005		ata_ident->mwdmamodes = 7;
1006		if (p->xfermode & ATA_WDMA0)
1007			ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8));
1008		ata_ident->apiomodes = 3;
1009		ata_ident->mwdmamin = 0x0078;
1010		ata_ident->mwdmarec = 0x0078;
1011		ata_ident->pioblind = 0x0078;
1012		ata_ident->pioiordy = 0x0078;
1013		ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
1014		ata_ident->satacapabilities2 = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
1015		ata_ident->satasupport = ATA_SUPPORT_NCQ_STREAM;
1016		ata_ident->version_major = 0x3f0;
1017		ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1018			ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1019		ata_ident->support.command2 = (1 << 14);
1020		ata_ident->support.extension = (1 << 14);
1021		ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1022			ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1023		ata_ident->enabled.extension = (1 << 14);
1024		ata_ident->udmamodes = 0x7f;
1025		if (p->xfermode & ATA_UDMA0)
1026			ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8));
1027		ata_ident->transport_major = 0x1020;
1028		ata_ident->integrity = 0x00a5;
1029	} else {
1030		uint64_t sectors;
1031		int sectsz, psectsz, psectoff, candelete, ro;
1032		uint16_t cyl;
1033		uint8_t sech, heads;
1034
1035		ro = blockif_is_ro(p->bctx);
1036		candelete = blockif_candelete(p->bctx);
1037		sectsz = blockif_sectsz(p->bctx);
1038		sectors = blockif_size(p->bctx) / sectsz;
1039		blockif_chs(p->bctx, &cyl, &heads, &sech);
1040		blockif_psectsz(p->bctx, &psectsz, &psectoff);
1041		ata_ident->config = ATA_DRQ_FAST;
1042		ata_ident->cylinders = cyl;
1043		ata_ident->heads = heads;
1044		ata_ident->sectors = sech;
1045
1046		ata_ident->sectors_intr = (0x8000 | 128);
1047		ata_ident->tcg = 0;
1048
1049		ata_ident->capabilities1 = ATA_SUPPORT_DMA |
1050			ATA_SUPPORT_LBA | ATA_SUPPORT_IORDY;
1051		ata_ident->capabilities2 = (1 << 14);
1052		ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88;
1053		if (p->mult_sectors)
1054			ata_ident->multi = (ATA_MULTI_VALID | p->mult_sectors);
1055		if (sectors <= 0x0fffffff) {
1056			ata_ident->lba_size_1 = sectors;
1057			ata_ident->lba_size_2 = (sectors >> 16);
1058		} else {
1059			ata_ident->lba_size_1 = 0xffff;
1060			ata_ident->lba_size_2 = 0x0fff;
1061		}
1062		ata_ident->mwdmamodes = 0x7;
1063		if (p->xfermode & ATA_WDMA0)
1064			ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8));
1065		ata_ident->apiomodes = 0x3;
1066		ata_ident->mwdmamin = 0x0078;
1067		ata_ident->mwdmarec = 0x0078;
1068		ata_ident->pioblind = 0x0078;
1069		ata_ident->pioiordy = 0x0078;
1070		ata_ident->support3 = 0;
1071		ata_ident->queue = 31;
1072		ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
1073			ATA_SUPPORT_NCQ);
1074		ata_ident->satacapabilities2 = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
1075			(p->ssts & ATA_SS_SPD_MASK) >> 3);
1076		ata_ident->version_major = 0x3f0;
1077		ata_ident->version_minor = 0x28;
1078		ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE |
1079			ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1080		ata_ident->support.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1081			ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
1082		ata_ident->support.extension = (1 << 14);
1083		ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE |
1084			ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1085		ata_ident->enabled.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1086			ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
1087		ata_ident->enabled.extension = (1 << 14);
1088		ata_ident->udmamodes = 0x7f;
1089		if (p->xfermode & ATA_UDMA0)
1090			ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8));
1091		ata_ident->lba_size48_1 = sectors;
1092		ata_ident->lba_size48_2 = (sectors >> 16);
1093		ata_ident->lba_size48_3 = (sectors >> 32);
1094		ata_ident->lba_size48_4 = (sectors >> 48);
1095
1096		if (candelete && !ro) {
1097			ata_ident->support3 |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
1098			ata_ident->max_dsm_blocks = 1;
1099			ata_ident->support_dsm = ATA_SUPPORT_DSM_TRIM;
1100		}
1101		ata_ident->pss = ATA_PSS_VALID_VALUE;
1102		ata_ident->lsalign = 0x4000;
1103		if (psectsz > sectsz) {
1104			ata_ident->pss |= ATA_PSS_MULTLS;
1105			ata_ident->pss |= ffsl(psectsz / sectsz) - 1;
1106			ata_ident->lsalign |= (psectoff / sectsz);
1107		}
1108		if (sectsz > 512) {
1109			ata_ident->pss |= ATA_PSS_LSSABOVE512;
1110			ata_ident->lss_1 = sectsz / 2;
1111			ata_ident->lss_2 = ((sectsz / 2) >> 16);
1112		}
1113		ata_ident->support2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1114		ata_ident->enabled2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1115		ata_ident->transport_major = 0x1020;
1116		ata_ident->integrity = 0x00a5;
1117	}
1118	ahci_checksum((uint8_t*)ata_ident, sizeof(struct ata_params));
1119}
1120
1121static void
1122handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
1123{
1124	if (!p->atapi) {
1125		ahci_write_fis_d2h(p, slot, cfis,
1126		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1127	} else {
1128		ahci_write_fis_piosetup(p);
1129		write_prdt(p, slot, cfis, (void *)&p->ata_ident, sizeof(struct ata_params));
1130		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1131	}
1132}
1133
1134static void
1135atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
1136{
1137	uint8_t buf[36];
1138	uint8_t *acmd;
1139	int len;
1140	uint32_t tfd;
1141
1142	acmd = cfis + 0x40;
1143
1144	if (acmd[1] & 1) {		/* VPD */
1145		if (acmd[2] == 0) {	/* Supported VPD pages */
1146			buf[0] = 0x05;
1147			buf[1] = 0;
1148			buf[2] = 0;
1149			buf[3] = 1;
1150			buf[4] = 0;
1151			len = 4 + buf[3];
1152		} else {
1153			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1154			p->asc = 0x24;
1155			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1156			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1157			ahci_write_fis_d2h(p, slot, cfis, tfd);
1158			return;
1159		}
1160	} else {
1161		buf[0] = 0x05;
1162		buf[1] = 0x80;
1163		buf[2] = 0x00;
1164		buf[3] = 0x21;
1165		buf[4] = 31;
1166		buf[5] = 0;
1167		buf[6] = 0;
1168		buf[7] = 0;
1169		atapi_string(buf + 8, "BHYVE", 8);
1170		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
1171		atapi_string(buf + 32, "001", 4);
1172		len = sizeof(buf);
1173	}
1174
1175	if (len > acmd[4])
1176		len = acmd[4];
1177	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1178	write_prdt(p, slot, cfis, buf, len);
1179	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1180}
1181
1182static void
1183atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
1184{
1185	uint8_t buf[8];
1186	uint64_t sectors;
1187
1188	sectors = blockif_size(p->bctx) / 2048;
1189	be32enc(buf, sectors - 1);
1190	be32enc(buf + 4, 2048);
1191	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1192	write_prdt(p, slot, cfis, buf, sizeof(buf));
1193	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1194}
1195
1196static void
1197atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1198{
1199	uint8_t *acmd;
1200	uint8_t format;
1201	int len;
1202
1203	acmd = cfis + 0x40;
1204
1205	len = be16dec(acmd + 7);
1206	format = acmd[9] >> 6;
1207	switch (format) {
1208	case 0:
1209	{
1210		int msf, size;
1211		uint64_t sectors;
1212		uint8_t start_track, buf[20], *bp;
1213
1214		msf = (acmd[1] >> 1) & 1;
1215		start_track = acmd[6];
1216		if (start_track > 1 && start_track != 0xaa) {
1217			uint32_t tfd;
1218			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1219			p->asc = 0x24;
1220			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1221			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1222			ahci_write_fis_d2h(p, slot, cfis, tfd);
1223			return;
1224		}
1225		bp = buf + 2;
1226		*bp++ = 1;
1227		*bp++ = 1;
1228		if (start_track <= 1) {
1229			*bp++ = 0;
1230			*bp++ = 0x14;
1231			*bp++ = 1;
1232			*bp++ = 0;
1233			if (msf) {
1234				*bp++ = 0;
1235				lba_to_msf(bp, 0);
1236				bp += 3;
1237			} else {
1238				*bp++ = 0;
1239				*bp++ = 0;
1240				*bp++ = 0;
1241				*bp++ = 0;
1242			}
1243		}
1244		*bp++ = 0;
1245		*bp++ = 0x14;
1246		*bp++ = 0xaa;
1247		*bp++ = 0;
1248		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1249		sectors >>= 2;
1250		if (msf) {
1251			*bp++ = 0;
1252			lba_to_msf(bp, sectors);
1253			bp += 3;
1254		} else {
1255			be32enc(bp, sectors);
1256			bp += 4;
1257		}
1258		size = bp - buf;
1259		be16enc(buf, size - 2);
1260		if (len > size)
1261			len = size;
1262		write_prdt(p, slot, cfis, buf, len);
1263		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1264		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1265		break;
1266	}
1267	case 1:
1268	{
1269		uint8_t buf[12];
1270
1271		memset(buf, 0, sizeof(buf));
1272		buf[1] = 0xa;
1273		buf[2] = 0x1;
1274		buf[3] = 0x1;
1275		if (len > sizeof(buf))
1276			len = sizeof(buf);
1277		write_prdt(p, slot, cfis, buf, len);
1278		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1279		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1280		break;
1281	}
1282	case 2:
1283	{
1284		int msf, size;
1285		uint64_t sectors;
1286		uint8_t *bp, buf[50];
1287
1288		msf = (acmd[1] >> 1) & 1;
1289		bp = buf + 2;
1290		*bp++ = 1;
1291		*bp++ = 1;
1292
1293		*bp++ = 1;
1294		*bp++ = 0x14;
1295		*bp++ = 0;
1296		*bp++ = 0xa0;
1297		*bp++ = 0;
1298		*bp++ = 0;
1299		*bp++ = 0;
1300		*bp++ = 0;
1301		*bp++ = 1;
1302		*bp++ = 0;
1303		*bp++ = 0;
1304
1305		*bp++ = 1;
1306		*bp++ = 0x14;
1307		*bp++ = 0;
1308		*bp++ = 0xa1;
1309		*bp++ = 0;
1310		*bp++ = 0;
1311		*bp++ = 0;
1312		*bp++ = 0;
1313		*bp++ = 1;
1314		*bp++ = 0;
1315		*bp++ = 0;
1316
1317		*bp++ = 1;
1318		*bp++ = 0x14;
1319		*bp++ = 0;
1320		*bp++ = 0xa2;
1321		*bp++ = 0;
1322		*bp++ = 0;
1323		*bp++ = 0;
1324		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1325		sectors >>= 2;
1326		if (msf) {
1327			*bp++ = 0;
1328			lba_to_msf(bp, sectors);
1329			bp += 3;
1330		} else {
1331			be32enc(bp, sectors);
1332			bp += 4;
1333		}
1334
1335		*bp++ = 1;
1336		*bp++ = 0x14;
1337		*bp++ = 0;
1338		*bp++ = 1;
1339		*bp++ = 0;
1340		*bp++ = 0;
1341		*bp++ = 0;
1342		if (msf) {
1343			*bp++ = 0;
1344			lba_to_msf(bp, 0);
1345			bp += 3;
1346		} else {
1347			*bp++ = 0;
1348			*bp++ = 0;
1349			*bp++ = 0;
1350			*bp++ = 0;
1351		}
1352
1353		size = bp - buf;
1354		be16enc(buf, size - 2);
1355		if (len > size)
1356			len = size;
1357		write_prdt(p, slot, cfis, buf, len);
1358		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1359		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1360		break;
1361	}
1362	default:
1363	{
1364		uint32_t tfd;
1365
1366		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1367		p->asc = 0x24;
1368		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1369		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1370		ahci_write_fis_d2h(p, slot, cfis, tfd);
1371		break;
1372	}
1373	}
1374}
1375
1376static void
1377atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
1378{
1379	uint8_t buf[16];
1380
1381	memset(buf, 0, sizeof(buf));
1382	buf[3] = 8;
1383
1384	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1385	write_prdt(p, slot, cfis, buf, sizeof(buf));
1386	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1387}
1388
1389static void
1390atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
1391{
1392	struct ahci_ioreq *aior;
1393	struct ahci_cmd_hdr *hdr;
1394	struct ahci_prdt_entry *prdt;
1395	struct blockif_req *breq;
1396	uint8_t *acmd;
1397	uint64_t lba;
1398	uint32_t len;
1399	int err;
1400
1401	acmd = cfis + 0x40;
1402	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1403	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1404
1405	lba = be32dec(acmd + 2);
1406	if (acmd[0] == READ_10)
1407		len = be16dec(acmd + 7);
1408	else
1409		len = be32dec(acmd + 6);
1410	if (len == 0) {
1411		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1412		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1413	}
1414	lba *= 2048;
1415	len *= 2048;
1416
1417	/*
1418	 * Pull request off free list
1419	 */
1420	aior = STAILQ_FIRST(&p->iofhd);
1421	assert(aior != NULL);
1422	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1423	aior->cfis = cfis;
1424	aior->slot = slot;
1425	aior->len = len;
1426	aior->done = done;
1427	breq = &aior->io_req;
1428	breq->br_offset = lba + done;
1429	ahci_build_iov(p, aior, prdt, hdr->prdtl);
1430
1431	/* Mark this command in-flight. */
1432	p->pending |= 1 << slot;
1433
1434	/* Stuff request onto busy list. */
1435	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1436
1437	err = blockif_read(p->bctx, breq);
1438	assert(err == 0);
1439}
1440
1441static void
1442atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1443{
1444	uint8_t buf[64];
1445	uint8_t *acmd;
1446	int len;
1447
1448	acmd = cfis + 0x40;
1449	len = acmd[4];
1450	if (len > sizeof(buf))
1451		len = sizeof(buf);
1452	memset(buf, 0, len);
1453	buf[0] = 0x70 | (1 << 7);
1454	buf[2] = p->sense_key;
1455	buf[7] = 10;
1456	buf[12] = p->asc;
1457	write_prdt(p, slot, cfis, buf, len);
1458	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1459	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1460}
1461
1462static void
1463atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1464{
1465	uint8_t *acmd = cfis + 0x40;
1466	uint32_t tfd;
1467
1468	switch (acmd[4] & 3) {
1469	case 0:
1470	case 1:
1471	case 3:
1472		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1473		tfd = ATA_S_READY | ATA_S_DSC;
1474		break;
1475	case 2:
1476		/* TODO eject media */
1477		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1478		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1479		p->asc = 0x53;
1480		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1481		break;
1482	}
1483	ahci_write_fis_d2h(p, slot, cfis, tfd);
1484}
1485
1486static void
1487atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1488{
1489	uint8_t *acmd;
1490	uint32_t tfd;
1491	uint8_t pc, code;
1492	int len;
1493
1494	acmd = cfis + 0x40;
1495	len = be16dec(acmd + 7);
1496	pc = acmd[2] >> 6;
1497	code = acmd[2] & 0x3f;
1498
1499	switch (pc) {
1500	case 0:
1501		switch (code) {
1502		case MODEPAGE_RW_ERROR_RECOVERY:
1503		{
1504			uint8_t buf[16];
1505
1506			if (len > sizeof(buf))
1507				len = sizeof(buf);
1508
1509			memset(buf, 0, sizeof(buf));
1510			be16enc(buf, 16 - 2);
1511			buf[2] = 0x70;
1512			buf[8] = 0x01;
1513			buf[9] = 16 - 10;
1514			buf[11] = 0x05;
1515			write_prdt(p, slot, cfis, buf, len);
1516			tfd = ATA_S_READY | ATA_S_DSC;
1517			break;
1518		}
1519		case MODEPAGE_CD_CAPABILITIES:
1520		{
1521			uint8_t buf[30];
1522
1523			if (len > sizeof(buf))
1524				len = sizeof(buf);
1525
1526			memset(buf, 0, sizeof(buf));
1527			be16enc(buf, 30 - 2);
1528			buf[2] = 0x70;
1529			buf[8] = 0x2A;
1530			buf[9] = 30 - 10;
1531			buf[10] = 0x08;
1532			buf[12] = 0x71;
1533			be16enc(&buf[18], 2);
1534			be16enc(&buf[20], 512);
1535			write_prdt(p, slot, cfis, buf, len);
1536			tfd = ATA_S_READY | ATA_S_DSC;
1537			break;
1538		}
1539		default:
1540			goto error;
1541			break;
1542		}
1543		break;
1544	case 3:
1545		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1546		p->asc = 0x39;
1547		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1548		break;
1549error:
1550	case 1:
1551	case 2:
1552		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1553		p->asc = 0x24;
1554		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1555		break;
1556	}
1557	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1558	ahci_write_fis_d2h(p, slot, cfis, tfd);
1559}
1560
1561static void
1562atapi_get_event_status_notification(struct ahci_port *p, int slot,
1563    uint8_t *cfis)
1564{
1565	uint8_t *acmd;
1566	uint32_t tfd;
1567
1568	acmd = cfis + 0x40;
1569
1570	/* we don't support asynchronous operation */
1571	if (!(acmd[1] & 1)) {
1572		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1573		p->asc = 0x24;
1574		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1575	} else {
1576		uint8_t buf[8];
1577		int len;
1578
1579		len = be16dec(acmd + 7);
1580		if (len > sizeof(buf))
1581			len = sizeof(buf);
1582
1583		memset(buf, 0, sizeof(buf));
1584		be16enc(buf, 8 - 2);
1585		buf[2] = 0x04;
1586		buf[3] = 0x10;
1587		buf[5] = 0x02;
1588		write_prdt(p, slot, cfis, buf, len);
1589		tfd = ATA_S_READY | ATA_S_DSC;
1590	}
1591	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1592	ahci_write_fis_d2h(p, slot, cfis, tfd);
1593}
1594
1595static void
1596handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1597{
1598	uint8_t *acmd;
1599
1600	acmd = cfis + 0x40;
1601
1602#ifdef AHCI_DEBUG
1603	{
1604		int i;
1605		DPRINTF("ACMD:");
1606		for (i = 0; i < 16; i++)
1607			DPRINTF("%02x ", acmd[i]);
1608		DPRINTF("\n");
1609	}
1610#endif
1611
1612	switch (acmd[0]) {
1613	case TEST_UNIT_READY:
1614		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1615		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1616		break;
1617	case INQUIRY:
1618		atapi_inquiry(p, slot, cfis);
1619		break;
1620	case READ_CAPACITY:
1621		atapi_read_capacity(p, slot, cfis);
1622		break;
1623	case PREVENT_ALLOW:
1624		/* TODO */
1625		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1626		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1627		break;
1628	case READ_TOC:
1629		atapi_read_toc(p, slot, cfis);
1630		break;
1631	case REPORT_LUNS:
1632		atapi_report_luns(p, slot, cfis);
1633		break;
1634	case READ_10:
1635	case READ_12:
1636		atapi_read(p, slot, cfis, 0);
1637		break;
1638	case REQUEST_SENSE:
1639		atapi_request_sense(p, slot, cfis);
1640		break;
1641	case START_STOP_UNIT:
1642		atapi_start_stop_unit(p, slot, cfis);
1643		break;
1644	case MODE_SENSE_10:
1645		atapi_mode_sense(p, slot, cfis);
1646		break;
1647	case GET_EVENT_STATUS_NOTIFICATION:
1648		atapi_get_event_status_notification(p, slot, cfis);
1649		break;
1650	default:
1651		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1652		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1653		p->asc = 0x20;
1654		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1655				ATA_S_READY | ATA_S_ERROR);
1656		break;
1657	}
1658}
1659
1660static void
1661ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1662{
1663
1664	p->tfd |= ATA_S_BUSY;
1665	switch (cfis[2]) {
1666	case ATA_ATA_IDENTIFY:
1667		handle_identify(p, slot, cfis);
1668		break;
1669	case ATA_SETFEATURES:
1670	{
1671		switch (cfis[3]) {
1672		case ATA_SF_ENAB_SATA_SF:
1673			switch (cfis[12]) {
1674			case ATA_SATA_SF_AN:
1675				p->tfd = ATA_S_DSC | ATA_S_READY;
1676				break;
1677			default:
1678				p->tfd = ATA_S_ERROR | ATA_S_READY;
1679				p->tfd |= (ATA_ERROR_ABORT << 8);
1680				break;
1681			}
1682			break;
1683		case ATA_SF_ENAB_WCACHE:
1684		case ATA_SF_DIS_WCACHE:
1685		case ATA_SF_ENAB_RCACHE:
1686		case ATA_SF_DIS_RCACHE:
1687			p->tfd = ATA_S_DSC | ATA_S_READY;
1688			break;
1689		case ATA_SF_SETXFER:
1690		{
1691			switch (cfis[12] & 0xf8) {
1692			case ATA_PIO:
1693			case ATA_PIO0:
1694				break;
1695			case ATA_WDMA0:
1696			case ATA_UDMA0:
1697				p->xfermode = (cfis[12] & 0x7);
1698				break;
1699			}
1700			p->tfd = ATA_S_DSC | ATA_S_READY;
1701			break;
1702		}
1703		default:
1704			p->tfd = ATA_S_ERROR | ATA_S_READY;
1705			p->tfd |= (ATA_ERROR_ABORT << 8);
1706			break;
1707		}
1708		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1709		break;
1710	}
1711	case ATA_SET_MULTI:
1712		if (cfis[12] != 0 &&
1713			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1714			p->tfd = ATA_S_ERROR | ATA_S_READY;
1715			p->tfd |= (ATA_ERROR_ABORT << 8);
1716		} else {
1717			p->mult_sectors = cfis[12];
1718			p->tfd = ATA_S_DSC | ATA_S_READY;
1719		}
1720		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1721		break;
1722	case ATA_READ:
1723	case ATA_WRITE:
1724	case ATA_READ48:
1725	case ATA_WRITE48:
1726	case ATA_READ_MUL:
1727	case ATA_WRITE_MUL:
1728	case ATA_READ_MUL48:
1729	case ATA_WRITE_MUL48:
1730	case ATA_READ_DMA:
1731	case ATA_WRITE_DMA:
1732	case ATA_READ_DMA48:
1733	case ATA_WRITE_DMA48:
1734	case ATA_READ_FPDMA_QUEUED:
1735	case ATA_WRITE_FPDMA_QUEUED:
1736		ahci_handle_rw(p, slot, cfis, 0);
1737		break;
1738	case ATA_FLUSHCACHE:
1739	case ATA_FLUSHCACHE48:
1740		ahci_handle_flush(p, slot, cfis);
1741		break;
1742	case ATA_DATA_SET_MANAGEMENT:
1743		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1744		    cfis[13] == 0 && cfis[12] == 1) {
1745			ahci_handle_dsm_trim(p, slot, cfis, 0);
1746			break;
1747		}
1748		ahci_write_fis_d2h(p, slot, cfis,
1749		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1750		break;
1751	case ATA_SEND_FPDMA_QUEUED:
1752		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1753		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1754		    cfis[11] == 0 && cfis[3] == 1) {
1755			ahci_handle_dsm_trim(p, slot, cfis, 0);
1756			break;
1757		}
1758		ahci_write_fis_d2h(p, slot, cfis,
1759		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1760		break;
1761	case ATA_READ_LOG_EXT:
1762	case ATA_READ_LOG_DMA_EXT:
1763		ahci_handle_read_log(p, slot, cfis);
1764		break;
1765	case ATA_SECURITY_FREEZE_LOCK:
1766	case ATA_SMART_CMD:
1767	case ATA_NOP:
1768		ahci_write_fis_d2h(p, slot, cfis,
1769		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1770		break;
1771	case ATA_CHECK_POWER_MODE:
1772		cfis[12] = 0xff;	/* always on */
1773		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1774		break;
1775	case ATA_STANDBY_CMD:
1776	case ATA_STANDBY_IMMEDIATE:
1777	case ATA_IDLE_CMD:
1778	case ATA_IDLE_IMMEDIATE:
1779	case ATA_SLEEP:
1780	case ATA_READ_VERIFY:
1781	case ATA_READ_VERIFY48:
1782		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1783		break;
1784	case ATA_ATAPI_IDENTIFY:
1785		handle_atapi_identify(p, slot, cfis);
1786		break;
1787	case ATA_PACKET_CMD:
1788		if (!p->atapi) {
1789			ahci_write_fis_d2h(p, slot, cfis,
1790			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1791		} else
1792			handle_packet_cmd(p, slot, cfis);
1793		break;
1794	default:
1795		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1796		ahci_write_fis_d2h(p, slot, cfis,
1797		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1798		break;
1799	}
1800}
1801
1802static void
1803ahci_handle_slot(struct ahci_port *p, int slot)
1804{
1805	struct ahci_cmd_hdr *hdr;
1806#ifdef AHCI_DEBUG
1807	struct ahci_prdt_entry *prdt;
1808#endif
1809	struct pci_ahci_softc *sc;
1810	uint8_t *cfis;
1811#ifdef AHCI_DEBUG
1812	int cfl, i;
1813#endif
1814
1815	sc = p->pr_sc;
1816	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1817#ifdef AHCI_DEBUG
1818	cfl = (hdr->flags & 0x1f) * 4;
1819#endif
1820	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1821			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1822#ifdef AHCI_DEBUG
1823	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1824
1825	DPRINTF("\ncfis:");
1826	for (i = 0; i < cfl; i++) {
1827		if (i % 10 == 0)
1828			DPRINTF("\n");
1829		DPRINTF("%02x ", cfis[i]);
1830	}
1831	DPRINTF("\n");
1832
1833	for (i = 0; i < hdr->prdtl; i++) {
1834		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1835		prdt++;
1836	}
1837#endif
1838
1839	if (cfis[0] != FIS_TYPE_REGH2D) {
1840		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1841		return;
1842	}
1843
1844	if (cfis[1] & 0x80) {
1845		ahci_handle_cmd(p, slot, cfis);
1846	} else {
1847		if (cfis[15] & (1 << 2))
1848			p->reset = 1;
1849		else if (p->reset) {
1850			p->reset = 0;
1851			ahci_port_reset(p);
1852		}
1853		p->ci &= ~(1 << slot);
1854	}
1855}
1856
1857static void
1858ahci_handle_port(struct ahci_port *p)
1859{
1860
1861	if (!(p->cmd & AHCI_P_CMD_ST))
1862		return;
1863
1864	/*
1865	 * Search for any new commands to issue ignoring those that
1866	 * are already in-flight.  Stop if device is busy or in error.
1867	 */
1868	for (; (p->ci & ~p->pending) != 0; p->ccs = ((p->ccs + 1) & 31)) {
1869		if ((p->tfd & (ATA_S_BUSY | ATA_S_DRQ)) != 0)
1870			break;
1871		if (p->waitforclear)
1872			break;
1873		if ((p->ci & ~p->pending & (1 << p->ccs)) != 0) {
1874			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1875			p->cmd |= p->ccs << AHCI_P_CMD_CCS_SHIFT;
1876			ahci_handle_slot(p, p->ccs);
1877		}
1878	}
1879}
1880
1881/*
1882 * blockif callback routine - this runs in the context of the blockif
1883 * i/o thread, so the mutex needs to be acquired.
1884 */
1885static void
1886ata_ioreq_cb(struct blockif_req *br, int err)
1887{
1888	struct ahci_cmd_hdr *hdr;
1889	struct ahci_ioreq *aior;
1890	struct ahci_port *p;
1891	struct pci_ahci_softc *sc;
1892	uint32_t tfd;
1893	uint8_t *cfis;
1894	int slot, ncq, dsm;
1895
1896	DPRINTF("%s %d\n", __func__, err);
1897
1898	ncq = dsm = 0;
1899	aior = br->br_param;
1900	p = aior->io_pr;
1901	cfis = aior->cfis;
1902	slot = aior->slot;
1903	sc = p->pr_sc;
1904	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1905
1906	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1907	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1908	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1909		ncq = 1;
1910	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
1911	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
1912	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
1913		dsm = 1;
1914
1915	pthread_mutex_lock(&sc->mtx);
1916
1917	/*
1918	 * Delete the blockif request from the busy list
1919	 */
1920	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1921
1922	/*
1923	 * Move the blockif request back to the free list
1924	 */
1925	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1926
1927	if (!err)
1928		hdr->prdbc = aior->done;
1929
1930	if (!err && aior->more) {
1931		if (dsm)
1932			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1933		else
1934			ahci_handle_rw(p, slot, cfis, aior->done);
1935		goto out;
1936	}
1937
1938	if (!err)
1939		tfd = ATA_S_READY | ATA_S_DSC;
1940	else
1941		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1942	if (ncq)
1943		ahci_write_fis_sdb(p, slot, cfis, tfd);
1944	else
1945		ahci_write_fis_d2h(p, slot, cfis, tfd);
1946
1947	/*
1948	 * This command is now complete.
1949	 */
1950	p->pending &= ~(1 << slot);
1951
1952	ahci_check_stopped(p);
1953	ahci_handle_port(p);
1954out:
1955	pthread_mutex_unlock(&sc->mtx);
1956	DPRINTF("%s exit\n", __func__);
1957}
1958
1959static void
1960atapi_ioreq_cb(struct blockif_req *br, int err)
1961{
1962	struct ahci_cmd_hdr *hdr;
1963	struct ahci_ioreq *aior;
1964	struct ahci_port *p;
1965	struct pci_ahci_softc *sc;
1966	uint8_t *cfis;
1967	uint32_t tfd;
1968	int slot;
1969
1970	DPRINTF("%s %d\n", __func__, err);
1971
1972	aior = br->br_param;
1973	p = aior->io_pr;
1974	cfis = aior->cfis;
1975	slot = aior->slot;
1976	sc = p->pr_sc;
1977	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1978
1979	pthread_mutex_lock(&sc->mtx);
1980
1981	/*
1982	 * Delete the blockif request from the busy list
1983	 */
1984	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1985
1986	/*
1987	 * Move the blockif request back to the free list
1988	 */
1989	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1990
1991	if (!err)
1992		hdr->prdbc = aior->done;
1993
1994	if (!err && aior->more) {
1995		atapi_read(p, slot, cfis, aior->done);
1996		goto out;
1997	}
1998
1999	if (!err) {
2000		tfd = ATA_S_READY | ATA_S_DSC;
2001	} else {
2002		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
2003		p->asc = 0x21;
2004		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
2005	}
2006	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
2007	ahci_write_fis_d2h(p, slot, cfis, tfd);
2008
2009	/*
2010	 * This command is now complete.
2011	 */
2012	p->pending &= ~(1 << slot);
2013
2014	ahci_check_stopped(p);
2015	ahci_handle_port(p);
2016out:
2017	pthread_mutex_unlock(&sc->mtx);
2018	DPRINTF("%s exit\n", __func__);
2019}
2020
2021static void
2022pci_ahci_ioreq_init(struct ahci_port *pr)
2023{
2024	struct ahci_ioreq *vr;
2025	int i;
2026
2027	pr->ioqsz = blockif_queuesz(pr->bctx);
2028	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
2029	STAILQ_INIT(&pr->iofhd);
2030
2031	/*
2032	 * Add all i/o request entries to the free queue
2033	 */
2034	for (i = 0; i < pr->ioqsz; i++) {
2035		vr = &pr->ioreq[i];
2036		vr->io_pr = pr;
2037		if (!pr->atapi)
2038			vr->io_req.br_callback = ata_ioreq_cb;
2039		else
2040			vr->io_req.br_callback = atapi_ioreq_cb;
2041		vr->io_req.br_param = vr;
2042		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
2043	}
2044
2045	TAILQ_INIT(&pr->iobhd);
2046}
2047
2048static void
2049pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2050{
2051	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2052	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2053	struct ahci_port *p = &sc->port[port];
2054
2055	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
2056		port, offset, value);
2057
2058	switch (offset) {
2059	case AHCI_P_CLB:
2060		p->clb = value;
2061		break;
2062	case AHCI_P_CLBU:
2063		p->clbu = value;
2064		break;
2065	case AHCI_P_FB:
2066		p->fb = value;
2067		break;
2068	case AHCI_P_FBU:
2069		p->fbu = value;
2070		break;
2071	case AHCI_P_IS:
2072		p->is &= ~value;
2073		ahci_port_intr(p);
2074		break;
2075	case AHCI_P_IE:
2076		p->ie = value & 0xFDC000FF;
2077		ahci_port_intr(p);
2078		break;
2079	case AHCI_P_CMD:
2080	{
2081		p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2082		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2083		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2084		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK);
2085		p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2086		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2087		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2088		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value;
2089
2090		if (!(value & AHCI_P_CMD_ST)) {
2091			ahci_port_stop(p);
2092		} else {
2093			uint64_t clb;
2094
2095			p->cmd |= AHCI_P_CMD_CR;
2096			clb = (uint64_t)p->clbu << 32 | p->clb;
2097			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
2098					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
2099		}
2100
2101		if (value & AHCI_P_CMD_FRE) {
2102			uint64_t fb;
2103
2104			p->cmd |= AHCI_P_CMD_FR;
2105			fb = (uint64_t)p->fbu << 32 | p->fb;
2106			/* we don't support FBSCP, so rfis size is 256Bytes */
2107			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
2108		} else {
2109			p->cmd &= ~AHCI_P_CMD_FR;
2110		}
2111
2112		if (value & AHCI_P_CMD_CLO) {
2113			p->tfd &= ~(ATA_S_BUSY | ATA_S_DRQ);
2114			p->cmd &= ~AHCI_P_CMD_CLO;
2115		}
2116
2117		if (value & AHCI_P_CMD_ICC_MASK) {
2118			p->cmd &= ~AHCI_P_CMD_ICC_MASK;
2119		}
2120
2121		ahci_handle_port(p);
2122		break;
2123	}
2124	case AHCI_P_TFD:
2125	case AHCI_P_SIG:
2126	case AHCI_P_SSTS:
2127		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
2128		break;
2129	case AHCI_P_SCTL:
2130		p->sctl = value;
2131		if (!(p->cmd & AHCI_P_CMD_ST)) {
2132			if (value & ATA_SC_DET_RESET)
2133				ahci_port_reset(p);
2134		}
2135		break;
2136	case AHCI_P_SERR:
2137		p->serr &= ~value;
2138		break;
2139	case AHCI_P_SACT:
2140		p->sact |= value;
2141		break;
2142	case AHCI_P_CI:
2143		p->ci |= value;
2144		ahci_handle_port(p);
2145		break;
2146	case AHCI_P_SNTF:
2147	case AHCI_P_FBS:
2148	default:
2149		break;
2150	}
2151}
2152
2153static void
2154pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2155{
2156	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
2157		offset, value);
2158
2159	switch (offset) {
2160	case AHCI_CAP:
2161	case AHCI_PI:
2162	case AHCI_VS:
2163	case AHCI_CAP2:
2164		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
2165		break;
2166	case AHCI_GHC:
2167		if (value & AHCI_GHC_HR) {
2168			ahci_reset(sc);
2169			break;
2170		}
2171		if (value & AHCI_GHC_IE)
2172			sc->ghc |= AHCI_GHC_IE;
2173		else
2174			sc->ghc &= ~AHCI_GHC_IE;
2175		ahci_generate_intr(sc, 0xffffffff);
2176		break;
2177	case AHCI_IS:
2178		sc->is &= ~value;
2179		ahci_generate_intr(sc, value);
2180		break;
2181	default:
2182		break;
2183	}
2184}
2185
2186static void
2187pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
2188		int baridx, uint64_t offset, int size, uint64_t value)
2189{
2190	struct pci_ahci_softc *sc = pi->pi_arg;
2191
2192	assert(baridx == 5);
2193	assert((offset % 4) == 0 && size == 4);
2194
2195	pthread_mutex_lock(&sc->mtx);
2196
2197	if (offset < AHCI_OFFSET)
2198		pci_ahci_host_write(sc, offset, value);
2199	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2200		pci_ahci_port_write(sc, offset, value);
2201	else
2202		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
2203
2204	pthread_mutex_unlock(&sc->mtx);
2205}
2206
2207static uint64_t
2208pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2209{
2210	uint32_t value;
2211
2212	switch (offset) {
2213	case AHCI_CAP:
2214	case AHCI_GHC:
2215	case AHCI_IS:
2216	case AHCI_PI:
2217	case AHCI_VS:
2218	case AHCI_CCCC:
2219	case AHCI_CCCP:
2220	case AHCI_EM_LOC:
2221	case AHCI_EM_CTL:
2222	case AHCI_CAP2:
2223	{
2224		uint32_t *p = &sc->cap;
2225		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2226		value = *p;
2227		break;
2228	}
2229	default:
2230		value = 0;
2231		break;
2232	}
2233	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
2234		offset, value);
2235
2236	return (value);
2237}
2238
2239static uint64_t
2240pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2241{
2242	uint32_t value;
2243	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2244	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2245
2246	switch (offset) {
2247	case AHCI_P_CLB:
2248	case AHCI_P_CLBU:
2249	case AHCI_P_FB:
2250	case AHCI_P_FBU:
2251	case AHCI_P_IS:
2252	case AHCI_P_IE:
2253	case AHCI_P_CMD:
2254	case AHCI_P_TFD:
2255	case AHCI_P_SIG:
2256	case AHCI_P_SSTS:
2257	case AHCI_P_SCTL:
2258	case AHCI_P_SERR:
2259	case AHCI_P_SACT:
2260	case AHCI_P_CI:
2261	case AHCI_P_SNTF:
2262	case AHCI_P_FBS:
2263	{
2264		uint32_t *p= &sc->port[port].clb;
2265		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2266		value = *p;
2267		break;
2268	}
2269	default:
2270		value = 0;
2271		break;
2272	}
2273
2274	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
2275		port, offset, value);
2276
2277	return value;
2278}
2279
2280static uint64_t
2281pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
2282    uint64_t regoff, int size)
2283{
2284	struct pci_ahci_softc *sc = pi->pi_arg;
2285	uint64_t offset;
2286	uint32_t value;
2287
2288	assert(baridx == 5);
2289	assert(size == 1 || size == 2 || size == 4);
2290	assert((regoff & (size - 1)) == 0);
2291
2292	pthread_mutex_lock(&sc->mtx);
2293
2294	offset = regoff & ~0x3;	    /* round down to a multiple of 4 bytes */
2295	if (offset < AHCI_OFFSET)
2296		value = pci_ahci_host_read(sc, offset);
2297	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2298		value = pci_ahci_port_read(sc, offset);
2299	else {
2300		value = 0;
2301		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n",
2302		    regoff);
2303	}
2304	value >>= 8 * (regoff & 0x3);
2305
2306	pthread_mutex_unlock(&sc->mtx);
2307
2308	return (value);
2309}
2310
2311static int
2312pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
2313{
2314	char bident[sizeof("XX:XX:XX")];
2315	struct blockif_ctxt *bctxt;
2316	struct pci_ahci_softc *sc;
2317	int ret, slots, p;
2318	MD5_CTX mdctx;
2319	u_char digest[16];
2320	char *next, *next2;
2321	char *bopt, *uopt, *xopts, *config;
2322	FILE* fp;
2323	size_t block_len;
2324	int comma, optpos;
2325
2326	ret = 0;
2327
2328#ifdef AHCI_DEBUG
2329	dbg = fopen("/tmp/log", "w+");
2330#endif
2331
2332	sc = calloc(1, sizeof(struct pci_ahci_softc));
2333	pi->pi_arg = sc;
2334	sc->asc_pi = pi;
2335	pthread_mutex_init(&sc->mtx, NULL);
2336	sc->ports = 0;
2337	sc->pi = 0;
2338	slots = 32;
2339
2340	for (p = 0; p < MAX_PORTS && opts != NULL; p++, opts = next) {
2341		struct ata_params *ata_ident = &sc->port[p].ata_ident;
2342		memset(ata_ident, 0, sizeof(struct ata_params));
2343
2344		/* Identify and cut off type of present port. */
2345		if (strncmp(opts, "hd:", 3) == 0) {
2346			atapi = 0;
2347			opts += 3;
2348		} else if (strncmp(opts, "cd:", 3) == 0) {
2349			atapi = 1;
2350			opts += 3;
2351		}
2352
2353		/* Find and cut off the next port options. */
2354		next = strstr(opts, ",hd:");
2355		next2 = strstr(opts, ",cd:");
2356		if (next == NULL || (next2 != NULL && next2 < next))
2357			next = next2;
2358		if (next != NULL) {
2359			next[0] = 0;
2360			next++;
2361		}
2362
2363		if (opts[0] == 0)
2364			continue;
2365
2366		uopt = strdup(opts);
2367		bopt = NULL;
2368		fp = open_memstream(&bopt, &block_len);
2369		comma = 0;
2370		optpos = 0;
2371
2372		for (xopts = strtok(uopt, ",");
2373		     xopts != NULL;
2374		     xopts = strtok(NULL, ",")) {
2375
2376			/* First option assume as block filename. */
2377			if (optpos == 0) {
2378				/*
2379				 * Create an identifier for the backing file.
2380				 * Use parts of the md5 sum of the filename
2381				 */
2382				char ident[AHCI_PORT_IDENT];
2383				MD5Init(&mdctx);
2384				MD5Update(&mdctx, opts, strlen(opts));
2385				MD5Final(digest, &mdctx);
2386				snprintf(ident, AHCI_PORT_IDENT,
2387					"BHYVE-%02X%02X-%02X%02X-%02X%02X",
2388					digest[0], digest[1], digest[2], digest[3], digest[4],
2389					digest[5]);
2390				ata_string((uint8_t*)&ata_ident->serial, ident, 20);
2391				ata_string((uint8_t*)&ata_ident->revision, "001", 8);
2392				if (atapi) {
2393					ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DVD ROM", 40);
2394				}
2395				else {
2396					ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DISK", 40);
2397				}
2398			}
2399
2400			if ((config = strchr(xopts, '=')) != NULL) {
2401				*config++ = '\0';
2402				if (!strcmp("nmrr", xopts)) {
2403					ata_ident->media_rotation_rate = atoi(config);
2404				}
2405				else if (!strcmp("ser", xopts)) {
2406					ata_string((uint8_t*)(&ata_ident->serial), config, 20);
2407				}
2408				else if (!strcmp("rev", xopts)) {
2409					ata_string((uint8_t*)(&ata_ident->revision), config, 8);
2410				}
2411				else if (!strcmp("model", xopts)) {
2412					ata_string((uint8_t*)(&ata_ident->model), config, 40);
2413				}
2414				else {
2415					/* Pass all other options to blockif_open. */
2416					*--config = '=';
2417					fprintf(fp, "%s%s", comma ? "," : "", xopts);
2418					comma = 1;
2419				}
2420			}
2421			else {
2422				/* Pass all other options to blockif_open. */
2423				fprintf(fp, "%s%s", comma ? "," : "", xopts);
2424				comma = 1;
2425			}
2426			optpos++;
2427		}
2428		free(uopt);
2429		fclose(fp);
2430
2431		DPRINTF("%s\n", bopt);
2432
2433		/*
2434		 * Attempt to open the backing image. Use the PCI slot/func
2435		 * and the port number for the identifier string.
2436		 */
2437		snprintf(bident, sizeof(bident), "%d:%d:%d", pi->pi_slot,
2438		    pi->pi_func, p);
2439		bctxt = blockif_open(bopt, bident);
2440		free(bopt);
2441
2442		if (bctxt == NULL) {
2443			sc->ports = p;
2444			ret = 1;
2445			goto open_fail;
2446		}
2447		sc->port[p].bctx = bctxt;
2448		sc->port[p].pr_sc = sc;
2449		sc->port[p].port = p;
2450		sc->port[p].atapi = atapi;
2451
2452		ata_identify_init(&sc->port[p], atapi);
2453
2454		/*
2455		 * Allocate blockif request structures and add them
2456		 * to the free list
2457		 */
2458		pci_ahci_ioreq_init(&sc->port[p]);
2459
2460		sc->pi |= (1 << p);
2461		if (sc->port[p].ioqsz < slots)
2462			slots = sc->port[p].ioqsz;
2463	}
2464	sc->ports = p;
2465
2466	/* Intel ICH8 AHCI */
2467	--slots;
2468	if (sc->ports < DEF_PORTS)
2469		sc->ports = DEF_PORTS;
2470	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2471	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2472	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2473	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2474	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2475
2476	sc->vs = 0x10300;
2477	sc->cap2 = AHCI_CAP2_APST;
2478	ahci_reset(sc);
2479
2480	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2481	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2482	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2483	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2484	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2485	p = MIN(sc->ports, 16);
2486	p = flsl(p) - ((p & (p - 1)) ? 0 : 1);
2487	pci_emul_add_msicap(pi, 1 << p);
2488	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2489	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2490
2491	pci_lintr_request(pi);
2492
2493open_fail:
2494	if (ret) {
2495		for (p = 0; p < sc->ports; p++) {
2496			if (sc->port[p].bctx != NULL)
2497				blockif_close(sc->port[p].bctx);
2498		}
2499		free(sc);
2500	}
2501
2502	return (ret);
2503}
2504
2505static int
2506pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2507{
2508
2509	return (pci_ahci_init(ctx, pi, opts, 0));
2510}
2511
2512static int
2513pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2514{
2515
2516	return (pci_ahci_init(ctx, pi, opts, 1));
2517}
2518
2519/*
2520 * Use separate emulation names to distinguish drive and atapi devices
2521 */
2522struct pci_devemu pci_de_ahci = {
2523	.pe_emu =	"ahci",
2524	.pe_init =	pci_ahci_hd_init,
2525	.pe_barwrite =	pci_ahci_write,
2526	.pe_barread =	pci_ahci_read
2527};
2528PCI_EMUL_SET(pci_de_ahci);
2529
2530struct pci_devemu pci_de_ahci_hd = {
2531	.pe_emu =	"ahci-hd",
2532	.pe_init =	pci_ahci_hd_init,
2533	.pe_barwrite =	pci_ahci_write,
2534	.pe_barread =	pci_ahci_read
2535};
2536PCI_EMUL_SET(pci_de_ahci_hd);
2537
2538struct pci_devemu pci_de_ahci_cd = {
2539	.pe_emu =	"ahci-cd",
2540	.pe_init =	pci_ahci_atapi_init,
2541	.pe_barwrite =	pci_ahci_write,
2542	.pe_barread =	pci_ahci_read
2543};
2544PCI_EMUL_SET(pci_de_ahci_cd);
2545