1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
5 * Copyright (c) 2015-2016 Alexander Motin <mav@FreeBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD$
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD$");
34
35#include <sys/param.h>
36#include <sys/linker_set.h>
37#include <sys/stat.h>
38#include <sys/uio.h>
39#include <sys/ioctl.h>
40#include <sys/disk.h>
41#include <sys/ata.h>
42#include <sys/endian.h>
43
44#include <machine/vmm_snapshot.h>
45
46#include <errno.h>
47#include <fcntl.h>
48#include <stdio.h>
49#include <stdlib.h>
50#include <stdint.h>
51#include <string.h>
52#include <strings.h>
53#include <unistd.h>
54#include <assert.h>
55#include <pthread.h>
56#include <pthread_np.h>
57#include <inttypes.h>
58#include <md5.h>
59
60#include "bhyverun.h"
61#include "pci_emul.h"
62#include "ahci.h"
63#include "block_if.h"
64
65#define	DEF_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
66#define	MAX_PORTS	32	/* AHCI supports 32 ports */
67
68#define	PxSIG_ATA	0x00000101 /* ATA drive */
69#define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
70
71enum sata_fis_type {
72	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
73	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
74	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
75	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
76	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
77	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
78	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
79	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
80};
81
82/*
83 * SCSI opcodes
84 */
85#define	TEST_UNIT_READY		0x00
86#define	REQUEST_SENSE		0x03
87#define	INQUIRY			0x12
88#define	START_STOP_UNIT		0x1B
89#define	PREVENT_ALLOW		0x1E
90#define	READ_CAPACITY		0x25
91#define	READ_10			0x28
92#define	POSITION_TO_ELEMENT	0x2B
93#define	READ_TOC		0x43
94#define	GET_EVENT_STATUS_NOTIFICATION 0x4A
95#define	MODE_SENSE_10		0x5A
96#define	REPORT_LUNS		0xA0
97#define	READ_12			0xA8
98#define	READ_CD			0xBE
99
100/*
101 * SCSI mode page codes
102 */
103#define	MODEPAGE_RW_ERROR_RECOVERY	0x01
104#define	MODEPAGE_CD_CAPABILITIES	0x2A
105
106/*
107 * ATA commands
108 */
109#define	ATA_SF_ENAB_SATA_SF		0x10
110#define	ATA_SATA_SF_AN			0x05
111#define	ATA_SF_DIS_SATA_SF		0x90
112
113/*
114 * Debug printf
115 */
116#ifdef AHCI_DEBUG
117static FILE *dbg;
118#define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
119#else
120#define DPRINTF(format, arg...)
121#endif
122#define WPRINTF(format, arg...) printf(format, ##arg)
123
124#define AHCI_PORT_IDENT 20 + 1
125
126struct ahci_ioreq {
127	struct blockif_req io_req;
128	struct ahci_port *io_pr;
129	STAILQ_ENTRY(ahci_ioreq) io_flist;
130	TAILQ_ENTRY(ahci_ioreq) io_blist;
131	uint8_t *cfis;
132	uint32_t len;
133	uint32_t done;
134	int slot;
135	int more;
136	int readop;
137};
138
139struct ahci_port {
140	struct blockif_ctxt *bctx;
141	struct pci_ahci_softc *pr_sc;
142	struct ata_params ata_ident;
143	uint8_t *cmd_lst;
144	uint8_t *rfis;
145	int port;
146	int atapi;
147	int reset;
148	int waitforclear;
149	int mult_sectors;
150	uint8_t xfermode;
151	uint8_t err_cfis[20];
152	uint8_t sense_key;
153	uint8_t asc;
154	u_int ccs;
155	uint32_t pending;
156
157	uint32_t clb;
158	uint32_t clbu;
159	uint32_t fb;
160	uint32_t fbu;
161	uint32_t is;
162	uint32_t ie;
163	uint32_t cmd;
164	uint32_t unused0;
165	uint32_t tfd;
166	uint32_t sig;
167	uint32_t ssts;
168	uint32_t sctl;
169	uint32_t serr;
170	uint32_t sact;
171	uint32_t ci;
172	uint32_t sntf;
173	uint32_t fbs;
174
175	/*
176	 * i/o request info
177	 */
178	struct ahci_ioreq *ioreq;
179	int ioqsz;
180	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
181	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
182};
183
184struct ahci_cmd_hdr {
185	uint16_t flags;
186	uint16_t prdtl;
187	uint32_t prdbc;
188	uint64_t ctba;
189	uint32_t reserved[4];
190};
191
192struct ahci_prdt_entry {
193	uint64_t dba;
194	uint32_t reserved;
195#define	DBCMASK		0x3fffff
196	uint32_t dbc;
197};
198
199struct pci_ahci_softc {
200	struct pci_devinst *asc_pi;
201	pthread_mutex_t	mtx;
202	int ports;
203	uint32_t cap;
204	uint32_t ghc;
205	uint32_t is;
206	uint32_t pi;
207	uint32_t vs;
208	uint32_t ccc_ctl;
209	uint32_t ccc_pts;
210	uint32_t em_loc;
211	uint32_t em_ctl;
212	uint32_t cap2;
213	uint32_t bohc;
214	uint32_t lintr;
215	struct ahci_port port[MAX_PORTS];
216};
217#define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
218
219static void ahci_handle_port(struct ahci_port *p);
220
221static inline void lba_to_msf(uint8_t *buf, int lba)
222{
223	lba += 150;
224	buf[0] = (lba / 75) / 60;
225	buf[1] = (lba / 75) % 60;
226	buf[2] = lba % 75;
227}
228
229/*
230 * Generate HBA interrupts on global IS register write.
231 */
232static void
233ahci_generate_intr(struct pci_ahci_softc *sc, uint32_t mask)
234{
235	struct pci_devinst *pi = sc->asc_pi;
236	struct ahci_port *p;
237	int i, nmsg;
238	uint32_t mmask;
239
240	/* Update global IS from PxIS/PxIE. */
241	for (i = 0; i < sc->ports; i++) {
242		p = &sc->port[i];
243		if (p->is & p->ie)
244			sc->is |= (1 << i);
245	}
246	DPRINTF("%s(%08x) %08x", __func__, mask, sc->is);
247
248	/* If there is nothing enabled -- clear legacy interrupt and exit. */
249	if (sc->is == 0 || (sc->ghc & AHCI_GHC_IE) == 0) {
250		if (sc->lintr) {
251			pci_lintr_deassert(pi);
252			sc->lintr = 0;
253		}
254		return;
255	}
256
257	/* If there is anything and no MSI -- assert legacy interrupt. */
258	nmsg = pci_msi_maxmsgnum(pi);
259	if (nmsg == 0) {
260		if (!sc->lintr) {
261			sc->lintr = 1;
262			pci_lintr_assert(pi);
263		}
264		return;
265	}
266
267	/* Assert respective MSIs for ports that were touched. */
268	for (i = 0; i < nmsg; i++) {
269		if (sc->ports <= nmsg || i < nmsg - 1)
270			mmask = 1 << i;
271		else
272			mmask = 0xffffffff << i;
273		if (sc->is & mask && mmask & mask)
274			pci_generate_msi(pi, i);
275	}
276}
277
278/*
279 * Generate HBA interrupt on specific port event.
280 */
281static void
282ahci_port_intr(struct ahci_port *p)
283{
284	struct pci_ahci_softc *sc = p->pr_sc;
285	struct pci_devinst *pi = sc->asc_pi;
286	int nmsg;
287
288	DPRINTF("%s(%d) %08x/%08x %08x", __func__,
289	    p->port, p->is, p->ie, sc->is);
290
291	/* If there is nothing enabled -- we are done. */
292	if ((p->is & p->ie) == 0)
293		return;
294
295	/* In case of non-shared MSI always generate interrupt. */
296	nmsg = pci_msi_maxmsgnum(pi);
297	if (sc->ports <= nmsg || p->port < nmsg - 1) {
298		sc->is |= (1 << p->port);
299		if ((sc->ghc & AHCI_GHC_IE) == 0)
300			return;
301		pci_generate_msi(pi, p->port);
302		return;
303	}
304
305	/* If IS for this port is already set -- do nothing. */
306	if (sc->is & (1 << p->port))
307		return;
308
309	sc->is |= (1 << p->port);
310
311	/* If interrupts are enabled -- generate one. */
312	if ((sc->ghc & AHCI_GHC_IE) == 0)
313		return;
314	if (nmsg > 0) {
315		pci_generate_msi(pi, nmsg - 1);
316	} else if (!sc->lintr) {
317		sc->lintr = 1;
318		pci_lintr_assert(pi);
319	}
320}
321
322static void
323ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
324{
325	int offset, len, irq;
326
327	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
328		return;
329
330	switch (ft) {
331	case FIS_TYPE_REGD2H:
332		offset = 0x40;
333		len = 20;
334		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_DHR : 0;
335		break;
336	case FIS_TYPE_SETDEVBITS:
337		offset = 0x58;
338		len = 8;
339		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_SDB : 0;
340		break;
341	case FIS_TYPE_PIOSETUP:
342		offset = 0x20;
343		len = 20;
344		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_PS : 0;
345		break;
346	default:
347		WPRINTF("unsupported fis type %d", ft);
348		return;
349	}
350	if (fis[2] & ATA_S_ERROR) {
351		p->waitforclear = 1;
352		irq |= AHCI_P_IX_TFE;
353	}
354	memcpy(p->rfis + offset, fis, len);
355	if (irq) {
356		if (~p->is & irq) {
357			p->is |= irq;
358			ahci_port_intr(p);
359		}
360	}
361}
362
363static void
364ahci_write_fis_piosetup(struct ahci_port *p)
365{
366	uint8_t fis[20];
367
368	memset(fis, 0, sizeof(fis));
369	fis[0] = FIS_TYPE_PIOSETUP;
370	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
371}
372
373static void
374ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
375{
376	uint8_t fis[8];
377	uint8_t error;
378
379	error = (tfd >> 8) & 0xff;
380	tfd &= 0x77;
381	memset(fis, 0, sizeof(fis));
382	fis[0] = FIS_TYPE_SETDEVBITS;
383	fis[1] = (1 << 6);
384	fis[2] = tfd;
385	fis[3] = error;
386	if (fis[2] & ATA_S_ERROR) {
387		p->err_cfis[0] = slot;
388		p->err_cfis[2] = tfd;
389		p->err_cfis[3] = error;
390		memcpy(&p->err_cfis[4], cfis + 4, 16);
391	} else {
392		*(uint32_t *)(fis + 4) = (1 << slot);
393		p->sact &= ~(1 << slot);
394	}
395	p->tfd &= ~0x77;
396	p->tfd |= tfd;
397	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
398}
399
400static void
401ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
402{
403	uint8_t fis[20];
404	uint8_t error;
405
406	error = (tfd >> 8) & 0xff;
407	memset(fis, 0, sizeof(fis));
408	fis[0] = FIS_TYPE_REGD2H;
409	fis[1] = (1 << 6);
410	fis[2] = tfd & 0xff;
411	fis[3] = error;
412	fis[4] = cfis[4];
413	fis[5] = cfis[5];
414	fis[6] = cfis[6];
415	fis[7] = cfis[7];
416	fis[8] = cfis[8];
417	fis[9] = cfis[9];
418	fis[10] = cfis[10];
419	fis[11] = cfis[11];
420	fis[12] = cfis[12];
421	fis[13] = cfis[13];
422	if (fis[2] & ATA_S_ERROR) {
423		p->err_cfis[0] = 0x80;
424		p->err_cfis[2] = tfd & 0xff;
425		p->err_cfis[3] = error;
426		memcpy(&p->err_cfis[4], cfis + 4, 16);
427	} else
428		p->ci &= ~(1 << slot);
429	p->tfd = tfd;
430	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
431}
432
433static void
434ahci_write_fis_d2h_ncq(struct ahci_port *p, int slot)
435{
436	uint8_t fis[20];
437
438	p->tfd = ATA_S_READY | ATA_S_DSC;
439	memset(fis, 0, sizeof(fis));
440	fis[0] = FIS_TYPE_REGD2H;
441	fis[1] = 0;			/* No interrupt */
442	fis[2] = p->tfd;		/* Status */
443	fis[3] = 0;			/* No error */
444	p->ci &= ~(1 << slot);
445	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
446}
447
448static void
449ahci_write_reset_fis_d2h(struct ahci_port *p)
450{
451	uint8_t fis[20];
452
453	memset(fis, 0, sizeof(fis));
454	fis[0] = FIS_TYPE_REGD2H;
455	fis[3] = 1;
456	fis[4] = 1;
457	if (p->atapi) {
458		fis[5] = 0x14;
459		fis[6] = 0xeb;
460	}
461	fis[12] = 1;
462	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
463}
464
465static void
466ahci_check_stopped(struct ahci_port *p)
467{
468	/*
469	 * If we are no longer processing the command list and nothing
470	 * is in-flight, clear the running bit, the current command
471	 * slot, the command issue and active bits.
472	 */
473	if (!(p->cmd & AHCI_P_CMD_ST)) {
474		if (p->pending == 0) {
475			p->ccs = 0;
476			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
477			p->ci = 0;
478			p->sact = 0;
479			p->waitforclear = 0;
480		}
481	}
482}
483
484static void
485ahci_port_stop(struct ahci_port *p)
486{
487	struct ahci_ioreq *aior;
488	uint8_t *cfis;
489	int slot;
490	int error;
491
492	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
493
494	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
495		/*
496		 * Try to cancel the outstanding blockif request.
497		 */
498		error = blockif_cancel(p->bctx, &aior->io_req);
499		if (error != 0)
500			continue;
501
502		slot = aior->slot;
503		cfis = aior->cfis;
504		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
505		    cfis[2] == ATA_READ_FPDMA_QUEUED ||
506		    cfis[2] == ATA_SEND_FPDMA_QUEUED)
507			p->sact &= ~(1 << slot);	/* NCQ */
508		else
509			p->ci &= ~(1 << slot);
510
511		/*
512		 * This command is now done.
513		 */
514		p->pending &= ~(1 << slot);
515
516		/*
517		 * Delete the blockif request from the busy list
518		 */
519		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
520
521		/*
522		 * Move the blockif request back to the free list
523		 */
524		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
525	}
526
527	ahci_check_stopped(p);
528}
529
530static void
531ahci_port_reset(struct ahci_port *pr)
532{
533	pr->serr = 0;
534	pr->sact = 0;
535	pr->xfermode = ATA_UDMA6;
536	pr->mult_sectors = 128;
537
538	if (!pr->bctx) {
539		pr->ssts = ATA_SS_DET_NO_DEVICE;
540		pr->sig = 0xFFFFFFFF;
541		pr->tfd = 0x7F;
542		return;
543	}
544	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
545	if (pr->sctl & ATA_SC_SPD_MASK)
546		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
547	else
548		pr->ssts |= ATA_SS_SPD_GEN3;
549	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
550	if (!pr->atapi) {
551		pr->sig = PxSIG_ATA;
552		pr->tfd |= ATA_S_READY;
553	} else
554		pr->sig = PxSIG_ATAPI;
555	ahci_write_reset_fis_d2h(pr);
556}
557
558static void
559ahci_reset(struct pci_ahci_softc *sc)
560{
561	int i;
562
563	sc->ghc = AHCI_GHC_AE;
564	sc->is = 0;
565
566	if (sc->lintr) {
567		pci_lintr_deassert(sc->asc_pi);
568		sc->lintr = 0;
569	}
570
571	for (i = 0; i < sc->ports; i++) {
572		sc->port[i].ie = 0;
573		sc->port[i].is = 0;
574		sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD);
575		if (sc->port[i].bctx)
576			sc->port[i].cmd |= AHCI_P_CMD_CPS;
577		sc->port[i].sctl = 0;
578		ahci_port_reset(&sc->port[i]);
579	}
580}
581
582static void
583ata_string(uint8_t *dest, const char *src, int len)
584{
585	int i;
586
587	for (i = 0; i < len; i++) {
588		if (*src)
589			dest[i ^ 1] = *src++;
590		else
591			dest[i ^ 1] = ' ';
592	}
593}
594
595static void
596atapi_string(uint8_t *dest, const char *src, int len)
597{
598	int i;
599
600	for (i = 0; i < len; i++) {
601		if (*src)
602			dest[i] = *src++;
603		else
604			dest[i] = ' ';
605	}
606}
607
608/*
609 * Build up the iovec based on the PRDT, 'done' and 'len'.
610 */
611static void
612ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
613    struct ahci_prdt_entry *prdt, uint16_t prdtl)
614{
615	struct blockif_req *breq = &aior->io_req;
616	int i, j, skip, todo, left, extra;
617	uint32_t dbcsz;
618
619	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
620	skip = aior->done;
621	left = aior->len - aior->done;
622	todo = 0;
623	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
624	    i++, prdt++) {
625		dbcsz = (prdt->dbc & DBCMASK) + 1;
626		/* Skip already done part of the PRDT */
627		if (dbcsz <= skip) {
628			skip -= dbcsz;
629			continue;
630		}
631		dbcsz -= skip;
632		if (dbcsz > left)
633			dbcsz = left;
634		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
635		    prdt->dba + skip, dbcsz);
636		breq->br_iov[j].iov_len = dbcsz;
637		todo += dbcsz;
638		left -= dbcsz;
639		skip = 0;
640		j++;
641	}
642
643	/* If we got limited by IOV length, round I/O down to sector size. */
644	if (j == BLOCKIF_IOV_MAX) {
645		extra = todo % blockif_sectsz(p->bctx);
646		todo -= extra;
647		assert(todo > 0);
648		while (extra > 0) {
649			if (breq->br_iov[j - 1].iov_len > extra) {
650				breq->br_iov[j - 1].iov_len -= extra;
651				break;
652			}
653			extra -= breq->br_iov[j - 1].iov_len;
654			j--;
655		}
656	}
657
658	breq->br_iovcnt = j;
659	breq->br_resid = todo;
660	aior->done += todo;
661	aior->more = (aior->done < aior->len && i < prdtl);
662}
663
664static void
665ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
666{
667	struct ahci_ioreq *aior;
668	struct blockif_req *breq;
669	struct ahci_prdt_entry *prdt;
670	struct ahci_cmd_hdr *hdr;
671	uint64_t lba;
672	uint32_t len;
673	int err, first, ncq, readop;
674
675	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
676	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
677	ncq = 0;
678	readop = 1;
679	first = (done == 0);
680
681	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
682	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
683	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
684	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
685		readop = 0;
686
687	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
688	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
689		lba = ((uint64_t)cfis[10] << 40) |
690			((uint64_t)cfis[9] << 32) |
691			((uint64_t)cfis[8] << 24) |
692			((uint64_t)cfis[6] << 16) |
693			((uint64_t)cfis[5] << 8) |
694			cfis[4];
695		len = cfis[11] << 8 | cfis[3];
696		if (!len)
697			len = 65536;
698		ncq = 1;
699	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
700	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
701	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
702		lba = ((uint64_t)cfis[10] << 40) |
703			((uint64_t)cfis[9] << 32) |
704			((uint64_t)cfis[8] << 24) |
705			((uint64_t)cfis[6] << 16) |
706			((uint64_t)cfis[5] << 8) |
707			cfis[4];
708		len = cfis[13] << 8 | cfis[12];
709		if (!len)
710			len = 65536;
711	} else {
712		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
713			(cfis[5] << 8) | cfis[4];
714		len = cfis[12];
715		if (!len)
716			len = 256;
717	}
718	lba *= blockif_sectsz(p->bctx);
719	len *= blockif_sectsz(p->bctx);
720
721	/* Pull request off free list */
722	aior = STAILQ_FIRST(&p->iofhd);
723	assert(aior != NULL);
724	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
725
726	aior->cfis = cfis;
727	aior->slot = slot;
728	aior->len = len;
729	aior->done = done;
730	aior->readop = readop;
731	breq = &aior->io_req;
732	breq->br_offset = lba + done;
733	ahci_build_iov(p, aior, prdt, hdr->prdtl);
734
735	/* Mark this command in-flight. */
736	p->pending |= 1 << slot;
737
738	/* Stuff request onto busy list. */
739	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
740
741	if (ncq && first)
742		ahci_write_fis_d2h_ncq(p, slot);
743
744	if (readop)
745		err = blockif_read(p->bctx, breq);
746	else
747		err = blockif_write(p->bctx, breq);
748	assert(err == 0);
749}
750
751static void
752ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
753{
754	struct ahci_ioreq *aior;
755	struct blockif_req *breq;
756	int err;
757
758	/*
759	 * Pull request off free list
760	 */
761	aior = STAILQ_FIRST(&p->iofhd);
762	assert(aior != NULL);
763	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
764	aior->cfis = cfis;
765	aior->slot = slot;
766	aior->len = 0;
767	aior->done = 0;
768	aior->more = 0;
769	breq = &aior->io_req;
770
771	/*
772	 * Mark this command in-flight.
773	 */
774	p->pending |= 1 << slot;
775
776	/*
777	 * Stuff request onto busy list
778	 */
779	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
780
781	err = blockif_flush(p->bctx, breq);
782	assert(err == 0);
783}
784
785static inline void
786read_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
787		void *buf, int size)
788{
789	struct ahci_cmd_hdr *hdr;
790	struct ahci_prdt_entry *prdt;
791	void *to;
792	int i, len;
793
794	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
795	len = size;
796	to = buf;
797	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
798	for (i = 0; i < hdr->prdtl && len; i++) {
799		uint8_t *ptr;
800		uint32_t dbcsz;
801		int sublen;
802
803		dbcsz = (prdt->dbc & DBCMASK) + 1;
804		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
805		sublen = MIN(len, dbcsz);
806		memcpy(to, ptr, sublen);
807		len -= sublen;
808		to += sublen;
809		prdt++;
810	}
811}
812
813static void
814ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
815{
816	struct ahci_ioreq *aior;
817	struct blockif_req *breq;
818	uint8_t *entry;
819	uint64_t elba;
820	uint32_t len, elen;
821	int err, first, ncq;
822	uint8_t buf[512];
823
824	first = (done == 0);
825	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
826		len = (uint16_t)cfis[13] << 8 | cfis[12];
827		len *= 512;
828		ncq = 0;
829	} else { /* ATA_SEND_FPDMA_QUEUED */
830		len = (uint16_t)cfis[11] << 8 | cfis[3];
831		len *= 512;
832		ncq = 1;
833	}
834	read_prdt(p, slot, cfis, buf, sizeof(buf));
835
836next:
837	entry = &buf[done];
838	elba = ((uint64_t)entry[5] << 40) |
839		((uint64_t)entry[4] << 32) |
840		((uint64_t)entry[3] << 24) |
841		((uint64_t)entry[2] << 16) |
842		((uint64_t)entry[1] << 8) |
843		entry[0];
844	elen = (uint16_t)entry[7] << 8 | entry[6];
845	done += 8;
846	if (elen == 0) {
847		if (done >= len) {
848			if (ncq) {
849				if (first)
850					ahci_write_fis_d2h_ncq(p, slot);
851				ahci_write_fis_sdb(p, slot, cfis,
852				    ATA_S_READY | ATA_S_DSC);
853			} else {
854				ahci_write_fis_d2h(p, slot, cfis,
855				    ATA_S_READY | ATA_S_DSC);
856			}
857			p->pending &= ~(1 << slot);
858			ahci_check_stopped(p);
859			if (!first)
860				ahci_handle_port(p);
861			return;
862		}
863		goto next;
864	}
865
866	/*
867	 * Pull request off free list
868	 */
869	aior = STAILQ_FIRST(&p->iofhd);
870	assert(aior != NULL);
871	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
872	aior->cfis = cfis;
873	aior->slot = slot;
874	aior->len = len;
875	aior->done = done;
876	aior->more = (len != done);
877
878	breq = &aior->io_req;
879	breq->br_offset = elba * blockif_sectsz(p->bctx);
880	breq->br_resid = elen * blockif_sectsz(p->bctx);
881
882	/*
883	 * Mark this command in-flight.
884	 */
885	p->pending |= 1 << slot;
886
887	/*
888	 * Stuff request onto busy list
889	 */
890	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
891
892	if (ncq && first)
893		ahci_write_fis_d2h_ncq(p, slot);
894
895	err = blockif_delete(p->bctx, breq);
896	assert(err == 0);
897}
898
899static inline void
900write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
901		void *buf, int size)
902{
903	struct ahci_cmd_hdr *hdr;
904	struct ahci_prdt_entry *prdt;
905	void *from;
906	int i, len;
907
908	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
909	len = size;
910	from = buf;
911	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
912	for (i = 0; i < hdr->prdtl && len; i++) {
913		uint8_t *ptr;
914		uint32_t dbcsz;
915		int sublen;
916
917		dbcsz = (prdt->dbc & DBCMASK) + 1;
918		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
919		sublen = MIN(len, dbcsz);
920		memcpy(ptr, from, sublen);
921		len -= sublen;
922		from += sublen;
923		prdt++;
924	}
925	hdr->prdbc = size - len;
926}
927
928static void
929ahci_checksum(uint8_t *buf, int size)
930{
931	int i;
932	uint8_t sum = 0;
933
934	for (i = 0; i < size - 1; i++)
935		sum += buf[i];
936	buf[size - 1] = 0x100 - sum;
937}
938
939static void
940ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
941{
942	struct ahci_cmd_hdr *hdr;
943	uint32_t buf[128];
944	uint8_t *buf8 = (uint8_t *)buf;
945	uint16_t *buf16 = (uint16_t *)buf;
946
947	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
948	if (p->atapi || hdr->prdtl == 0 || cfis[5] != 0 ||
949	    cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
950		ahci_write_fis_d2h(p, slot, cfis,
951		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
952		return;
953	}
954
955	memset(buf, 0, sizeof(buf));
956	if (cfis[4] == 0x00) {	/* Log directory */
957		buf16[0x00] = 1; /* Version -- 1 */
958		buf16[0x10] = 1; /* NCQ Command Error Log -- 1 page */
959		buf16[0x13] = 1; /* SATA NCQ Send and Receive Log -- 1 page */
960	} else if (cfis[4] == 0x10) {	/* NCQ Command Error Log */
961		memcpy(buf8, p->err_cfis, sizeof(p->err_cfis));
962		ahci_checksum(buf8, sizeof(buf));
963	} else if (cfis[4] == 0x13) {	/* SATA NCQ Send and Receive Log */
964		if (blockif_candelete(p->bctx) && !blockif_is_ro(p->bctx)) {
965			buf[0x00] = 1;	/* SFQ DSM supported */
966			buf[0x01] = 1;	/* SFQ DSM TRIM supported */
967		}
968	} else {
969		ahci_write_fis_d2h(p, slot, cfis,
970		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
971		return;
972	}
973
974	if (cfis[2] == ATA_READ_LOG_EXT)
975		ahci_write_fis_piosetup(p);
976	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
977	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
978}
979
980static void
981handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
982{
983	struct ahci_cmd_hdr *hdr;
984
985	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
986	if (p->atapi || hdr->prdtl == 0) {
987		ahci_write_fis_d2h(p, slot, cfis,
988		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
989	} else {
990		ahci_write_fis_piosetup(p);
991		write_prdt(p, slot, cfis, (void*)&p->ata_ident, sizeof(struct ata_params));
992		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
993	}
994}
995
996static void
997ata_identify_init(struct ahci_port* p, int atapi)
998{
999	struct ata_params* ata_ident = &p->ata_ident;
1000
1001	if (atapi) {
1002		ata_ident->config = ATA_PROTO_ATAPI | ATA_ATAPI_TYPE_CDROM |
1003		    ATA_ATAPI_REMOVABLE | ATA_DRQ_FAST;
1004		ata_ident->capabilities1 = ATA_SUPPORT_LBA |
1005			ATA_SUPPORT_DMA;
1006		ata_ident->capabilities2 = (1 << 14 | 1);
1007		ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88;
1008		ata_ident->obsolete62 = 0x3f;
1009		ata_ident->mwdmamodes = 7;
1010		if (p->xfermode & ATA_WDMA0)
1011			ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8));
1012		ata_ident->apiomodes = 3;
1013		ata_ident->mwdmamin = 0x0078;
1014		ata_ident->mwdmarec = 0x0078;
1015		ata_ident->pioblind = 0x0078;
1016		ata_ident->pioiordy = 0x0078;
1017		ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
1018		ata_ident->satacapabilities2 = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
1019		ata_ident->satasupport = ATA_SUPPORT_NCQ_STREAM;
1020		ata_ident->version_major = 0x3f0;
1021		ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1022			ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1023		ata_ident->support.command2 = (1 << 14);
1024		ata_ident->support.extension = (1 << 14);
1025		ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1026			ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1027		ata_ident->enabled.extension = (1 << 14);
1028		ata_ident->udmamodes = 0x7f;
1029		if (p->xfermode & ATA_UDMA0)
1030			ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8));
1031		ata_ident->transport_major = 0x1020;
1032		ata_ident->integrity = 0x00a5;
1033	} else {
1034		uint64_t sectors;
1035		int sectsz, psectsz, psectoff, candelete, ro;
1036		uint16_t cyl;
1037		uint8_t sech, heads;
1038
1039		ro = blockif_is_ro(p->bctx);
1040		candelete = blockif_candelete(p->bctx);
1041		sectsz = blockif_sectsz(p->bctx);
1042		sectors = blockif_size(p->bctx) / sectsz;
1043		blockif_chs(p->bctx, &cyl, &heads, &sech);
1044		blockif_psectsz(p->bctx, &psectsz, &psectoff);
1045		ata_ident->config = ATA_DRQ_FAST;
1046		ata_ident->cylinders = cyl;
1047		ata_ident->heads = heads;
1048		ata_ident->sectors = sech;
1049
1050		ata_ident->sectors_intr = (0x8000 | 128);
1051		ata_ident->tcg = 0;
1052
1053		ata_ident->capabilities1 = ATA_SUPPORT_DMA |
1054			ATA_SUPPORT_LBA | ATA_SUPPORT_IORDY;
1055		ata_ident->capabilities2 = (1 << 14);
1056		ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88;
1057		if (p->mult_sectors)
1058			ata_ident->multi = (ATA_MULTI_VALID | p->mult_sectors);
1059		if (sectors <= 0x0fffffff) {
1060			ata_ident->lba_size_1 = sectors;
1061			ata_ident->lba_size_2 = (sectors >> 16);
1062		} else {
1063			ata_ident->lba_size_1 = 0xffff;
1064			ata_ident->lba_size_2 = 0x0fff;
1065		}
1066		ata_ident->mwdmamodes = 0x7;
1067		if (p->xfermode & ATA_WDMA0)
1068			ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8));
1069		ata_ident->apiomodes = 0x3;
1070		ata_ident->mwdmamin = 0x0078;
1071		ata_ident->mwdmarec = 0x0078;
1072		ata_ident->pioblind = 0x0078;
1073		ata_ident->pioiordy = 0x0078;
1074		ata_ident->support3 = 0;
1075		ata_ident->queue = 31;
1076		ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
1077			ATA_SUPPORT_NCQ);
1078		ata_ident->satacapabilities2 = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
1079			(p->ssts & ATA_SS_SPD_MASK) >> 3);
1080		ata_ident->version_major = 0x3f0;
1081		ata_ident->version_minor = 0x28;
1082		ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE |
1083			ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1084		ata_ident->support.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1085			ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
1086		ata_ident->support.extension = (1 << 14);
1087		ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE |
1088			ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1089		ata_ident->enabled.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1090			ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
1091		ata_ident->enabled.extension = (1 << 14);
1092		ata_ident->udmamodes = 0x7f;
1093		if (p->xfermode & ATA_UDMA0)
1094			ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8));
1095		ata_ident->lba_size48_1 = sectors;
1096		ata_ident->lba_size48_2 = (sectors >> 16);
1097		ata_ident->lba_size48_3 = (sectors >> 32);
1098		ata_ident->lba_size48_4 = (sectors >> 48);
1099
1100		if (candelete && !ro) {
1101			ata_ident->support3 |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
1102			ata_ident->max_dsm_blocks = 1;
1103			ata_ident->support_dsm = ATA_SUPPORT_DSM_TRIM;
1104		}
1105		ata_ident->pss = ATA_PSS_VALID_VALUE;
1106		ata_ident->lsalign = 0x4000;
1107		if (psectsz > sectsz) {
1108			ata_ident->pss |= ATA_PSS_MULTLS;
1109			ata_ident->pss |= ffsl(psectsz / sectsz) - 1;
1110			ata_ident->lsalign |= (psectoff / sectsz);
1111		}
1112		if (sectsz > 512) {
1113			ata_ident->pss |= ATA_PSS_LSSABOVE512;
1114			ata_ident->lss_1 = sectsz / 2;
1115			ata_ident->lss_2 = ((sectsz / 2) >> 16);
1116		}
1117		ata_ident->support2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1118		ata_ident->enabled2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1119		ata_ident->transport_major = 0x1020;
1120		ata_ident->integrity = 0x00a5;
1121	}
1122	ahci_checksum((uint8_t*)ata_ident, sizeof(struct ata_params));
1123}
1124
1125static void
1126handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
1127{
1128	if (!p->atapi) {
1129		ahci_write_fis_d2h(p, slot, cfis,
1130		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1131	} else {
1132		ahci_write_fis_piosetup(p);
1133		write_prdt(p, slot, cfis, (void *)&p->ata_ident, sizeof(struct ata_params));
1134		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1135	}
1136}
1137
1138static void
1139atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
1140{
1141	uint8_t buf[36];
1142	uint8_t *acmd;
1143	int len;
1144	uint32_t tfd;
1145
1146	acmd = cfis + 0x40;
1147
1148	if (acmd[1] & 1) {		/* VPD */
1149		if (acmd[2] == 0) {	/* Supported VPD pages */
1150			buf[0] = 0x05;
1151			buf[1] = 0;
1152			buf[2] = 0;
1153			buf[3] = 1;
1154			buf[4] = 0;
1155			len = 4 + buf[3];
1156		} else {
1157			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1158			p->asc = 0x24;
1159			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1160			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1161			ahci_write_fis_d2h(p, slot, cfis, tfd);
1162			return;
1163		}
1164	} else {
1165		buf[0] = 0x05;
1166		buf[1] = 0x80;
1167		buf[2] = 0x00;
1168		buf[3] = 0x21;
1169		buf[4] = 31;
1170		buf[5] = 0;
1171		buf[6] = 0;
1172		buf[7] = 0;
1173		atapi_string(buf + 8, "BHYVE", 8);
1174		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
1175		atapi_string(buf + 32, "001", 4);
1176		len = sizeof(buf);
1177	}
1178
1179	if (len > acmd[4])
1180		len = acmd[4];
1181	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1182	write_prdt(p, slot, cfis, buf, len);
1183	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1184}
1185
1186static void
1187atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
1188{
1189	uint8_t buf[8];
1190	uint64_t sectors;
1191
1192	sectors = blockif_size(p->bctx) / 2048;
1193	be32enc(buf, sectors - 1);
1194	be32enc(buf + 4, 2048);
1195	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1196	write_prdt(p, slot, cfis, buf, sizeof(buf));
1197	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1198}
1199
1200static void
1201atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1202{
1203	uint8_t *acmd;
1204	uint8_t format;
1205	int len;
1206
1207	acmd = cfis + 0x40;
1208
1209	len = be16dec(acmd + 7);
1210	format = acmd[9] >> 6;
1211	switch (format) {
1212	case 0:
1213	{
1214		int msf, size;
1215		uint64_t sectors;
1216		uint8_t start_track, buf[20], *bp;
1217
1218		msf = (acmd[1] >> 1) & 1;
1219		start_track = acmd[6];
1220		if (start_track > 1 && start_track != 0xaa) {
1221			uint32_t tfd;
1222			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1223			p->asc = 0x24;
1224			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1225			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1226			ahci_write_fis_d2h(p, slot, cfis, tfd);
1227			return;
1228		}
1229		bp = buf + 2;
1230		*bp++ = 1;
1231		*bp++ = 1;
1232		if (start_track <= 1) {
1233			*bp++ = 0;
1234			*bp++ = 0x14;
1235			*bp++ = 1;
1236			*bp++ = 0;
1237			if (msf) {
1238				*bp++ = 0;
1239				lba_to_msf(bp, 0);
1240				bp += 3;
1241			} else {
1242				*bp++ = 0;
1243				*bp++ = 0;
1244				*bp++ = 0;
1245				*bp++ = 0;
1246			}
1247		}
1248		*bp++ = 0;
1249		*bp++ = 0x14;
1250		*bp++ = 0xaa;
1251		*bp++ = 0;
1252		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1253		sectors >>= 2;
1254		if (msf) {
1255			*bp++ = 0;
1256			lba_to_msf(bp, sectors);
1257			bp += 3;
1258		} else {
1259			be32enc(bp, sectors);
1260			bp += 4;
1261		}
1262		size = bp - buf;
1263		be16enc(buf, size - 2);
1264		if (len > size)
1265			len = size;
1266		write_prdt(p, slot, cfis, buf, len);
1267		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1268		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1269		break;
1270	}
1271	case 1:
1272	{
1273		uint8_t buf[12];
1274
1275		memset(buf, 0, sizeof(buf));
1276		buf[1] = 0xa;
1277		buf[2] = 0x1;
1278		buf[3] = 0x1;
1279		if (len > sizeof(buf))
1280			len = sizeof(buf);
1281		write_prdt(p, slot, cfis, buf, len);
1282		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1283		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1284		break;
1285	}
1286	case 2:
1287	{
1288		int msf, size;
1289		uint64_t sectors;
1290		uint8_t *bp, buf[50];
1291
1292		msf = (acmd[1] >> 1) & 1;
1293		bp = buf + 2;
1294		*bp++ = 1;
1295		*bp++ = 1;
1296
1297		*bp++ = 1;
1298		*bp++ = 0x14;
1299		*bp++ = 0;
1300		*bp++ = 0xa0;
1301		*bp++ = 0;
1302		*bp++ = 0;
1303		*bp++ = 0;
1304		*bp++ = 0;
1305		*bp++ = 1;
1306		*bp++ = 0;
1307		*bp++ = 0;
1308
1309		*bp++ = 1;
1310		*bp++ = 0x14;
1311		*bp++ = 0;
1312		*bp++ = 0xa1;
1313		*bp++ = 0;
1314		*bp++ = 0;
1315		*bp++ = 0;
1316		*bp++ = 0;
1317		*bp++ = 1;
1318		*bp++ = 0;
1319		*bp++ = 0;
1320
1321		*bp++ = 1;
1322		*bp++ = 0x14;
1323		*bp++ = 0;
1324		*bp++ = 0xa2;
1325		*bp++ = 0;
1326		*bp++ = 0;
1327		*bp++ = 0;
1328		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1329		sectors >>= 2;
1330		if (msf) {
1331			*bp++ = 0;
1332			lba_to_msf(bp, sectors);
1333			bp += 3;
1334		} else {
1335			be32enc(bp, sectors);
1336			bp += 4;
1337		}
1338
1339		*bp++ = 1;
1340		*bp++ = 0x14;
1341		*bp++ = 0;
1342		*bp++ = 1;
1343		*bp++ = 0;
1344		*bp++ = 0;
1345		*bp++ = 0;
1346		if (msf) {
1347			*bp++ = 0;
1348			lba_to_msf(bp, 0);
1349			bp += 3;
1350		} else {
1351			*bp++ = 0;
1352			*bp++ = 0;
1353			*bp++ = 0;
1354			*bp++ = 0;
1355		}
1356
1357		size = bp - buf;
1358		be16enc(buf, size - 2);
1359		if (len > size)
1360			len = size;
1361		write_prdt(p, slot, cfis, buf, len);
1362		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1363		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1364		break;
1365	}
1366	default:
1367	{
1368		uint32_t tfd;
1369
1370		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1371		p->asc = 0x24;
1372		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1373		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1374		ahci_write_fis_d2h(p, slot, cfis, tfd);
1375		break;
1376	}
1377	}
1378}
1379
1380static void
1381atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
1382{
1383	uint8_t buf[16];
1384
1385	memset(buf, 0, sizeof(buf));
1386	buf[3] = 8;
1387
1388	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1389	write_prdt(p, slot, cfis, buf, sizeof(buf));
1390	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1391}
1392
1393static void
1394atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
1395{
1396	struct ahci_ioreq *aior;
1397	struct ahci_cmd_hdr *hdr;
1398	struct ahci_prdt_entry *prdt;
1399	struct blockif_req *breq;
1400	uint8_t *acmd;
1401	uint64_t lba;
1402	uint32_t len;
1403	int err;
1404
1405	acmd = cfis + 0x40;
1406	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1407	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1408
1409	lba = be32dec(acmd + 2);
1410	if (acmd[0] == READ_10)
1411		len = be16dec(acmd + 7);
1412	else
1413		len = be32dec(acmd + 6);
1414	if (len == 0) {
1415		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1416		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1417	}
1418	lba *= 2048;
1419	len *= 2048;
1420
1421	/*
1422	 * Pull request off free list
1423	 */
1424	aior = STAILQ_FIRST(&p->iofhd);
1425	assert(aior != NULL);
1426	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1427	aior->cfis = cfis;
1428	aior->slot = slot;
1429	aior->len = len;
1430	aior->done = done;
1431	aior->readop = 1;
1432	breq = &aior->io_req;
1433	breq->br_offset = lba + done;
1434	ahci_build_iov(p, aior, prdt, hdr->prdtl);
1435
1436	/* Mark this command in-flight. */
1437	p->pending |= 1 << slot;
1438
1439	/* Stuff request onto busy list. */
1440	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1441
1442	err = blockif_read(p->bctx, breq);
1443	assert(err == 0);
1444}
1445
1446static void
1447atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1448{
1449	uint8_t buf[64];
1450	uint8_t *acmd;
1451	int len;
1452
1453	acmd = cfis + 0x40;
1454	len = acmd[4];
1455	if (len > sizeof(buf))
1456		len = sizeof(buf);
1457	memset(buf, 0, len);
1458	buf[0] = 0x70 | (1 << 7);
1459	buf[2] = p->sense_key;
1460	buf[7] = 10;
1461	buf[12] = p->asc;
1462	write_prdt(p, slot, cfis, buf, len);
1463	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1464	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1465}
1466
1467static void
1468atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1469{
1470	uint8_t *acmd = cfis + 0x40;
1471	uint32_t tfd;
1472
1473	switch (acmd[4] & 3) {
1474	case 0:
1475	case 1:
1476	case 3:
1477		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1478		tfd = ATA_S_READY | ATA_S_DSC;
1479		break;
1480	case 2:
1481		/* TODO eject media */
1482		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1483		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1484		p->asc = 0x53;
1485		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1486		break;
1487	}
1488	ahci_write_fis_d2h(p, slot, cfis, tfd);
1489}
1490
1491static void
1492atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1493{
1494	uint8_t *acmd;
1495	uint32_t tfd;
1496	uint8_t pc, code;
1497	int len;
1498
1499	acmd = cfis + 0x40;
1500	len = be16dec(acmd + 7);
1501	pc = acmd[2] >> 6;
1502	code = acmd[2] & 0x3f;
1503
1504	switch (pc) {
1505	case 0:
1506		switch (code) {
1507		case MODEPAGE_RW_ERROR_RECOVERY:
1508		{
1509			uint8_t buf[16];
1510
1511			if (len > sizeof(buf))
1512				len = sizeof(buf);
1513
1514			memset(buf, 0, sizeof(buf));
1515			be16enc(buf, 16 - 2);
1516			buf[2] = 0x70;
1517			buf[8] = 0x01;
1518			buf[9] = 16 - 10;
1519			buf[11] = 0x05;
1520			write_prdt(p, slot, cfis, buf, len);
1521			tfd = ATA_S_READY | ATA_S_DSC;
1522			break;
1523		}
1524		case MODEPAGE_CD_CAPABILITIES:
1525		{
1526			uint8_t buf[30];
1527
1528			if (len > sizeof(buf))
1529				len = sizeof(buf);
1530
1531			memset(buf, 0, sizeof(buf));
1532			be16enc(buf, 30 - 2);
1533			buf[2] = 0x70;
1534			buf[8] = 0x2A;
1535			buf[9] = 30 - 10;
1536			buf[10] = 0x08;
1537			buf[12] = 0x71;
1538			be16enc(&buf[18], 2);
1539			be16enc(&buf[20], 512);
1540			write_prdt(p, slot, cfis, buf, len);
1541			tfd = ATA_S_READY | ATA_S_DSC;
1542			break;
1543		}
1544		default:
1545			goto error;
1546			break;
1547		}
1548		break;
1549	case 3:
1550		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1551		p->asc = 0x39;
1552		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1553		break;
1554error:
1555	case 1:
1556	case 2:
1557		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1558		p->asc = 0x24;
1559		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1560		break;
1561	}
1562	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1563	ahci_write_fis_d2h(p, slot, cfis, tfd);
1564}
1565
1566static void
1567atapi_get_event_status_notification(struct ahci_port *p, int slot,
1568    uint8_t *cfis)
1569{
1570	uint8_t *acmd;
1571	uint32_t tfd;
1572
1573	acmd = cfis + 0x40;
1574
1575	/* we don't support asynchronous operation */
1576	if (!(acmd[1] & 1)) {
1577		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1578		p->asc = 0x24;
1579		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1580	} else {
1581		uint8_t buf[8];
1582		int len;
1583
1584		len = be16dec(acmd + 7);
1585		if (len > sizeof(buf))
1586			len = sizeof(buf);
1587
1588		memset(buf, 0, sizeof(buf));
1589		be16enc(buf, 8 - 2);
1590		buf[2] = 0x04;
1591		buf[3] = 0x10;
1592		buf[5] = 0x02;
1593		write_prdt(p, slot, cfis, buf, len);
1594		tfd = ATA_S_READY | ATA_S_DSC;
1595	}
1596	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1597	ahci_write_fis_d2h(p, slot, cfis, tfd);
1598}
1599
1600static void
1601handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1602{
1603	uint8_t *acmd;
1604
1605	acmd = cfis + 0x40;
1606
1607#ifdef AHCI_DEBUG
1608	{
1609		int i;
1610		DPRINTF("ACMD:");
1611		for (i = 0; i < 16; i++)
1612			DPRINTF("%02x ", acmd[i]);
1613		DPRINTF("");
1614	}
1615#endif
1616
1617	switch (acmd[0]) {
1618	case TEST_UNIT_READY:
1619		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1620		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1621		break;
1622	case INQUIRY:
1623		atapi_inquiry(p, slot, cfis);
1624		break;
1625	case READ_CAPACITY:
1626		atapi_read_capacity(p, slot, cfis);
1627		break;
1628	case PREVENT_ALLOW:
1629		/* TODO */
1630		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1631		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1632		break;
1633	case READ_TOC:
1634		atapi_read_toc(p, slot, cfis);
1635		break;
1636	case REPORT_LUNS:
1637		atapi_report_luns(p, slot, cfis);
1638		break;
1639	case READ_10:
1640	case READ_12:
1641		atapi_read(p, slot, cfis, 0);
1642		break;
1643	case REQUEST_SENSE:
1644		atapi_request_sense(p, slot, cfis);
1645		break;
1646	case START_STOP_UNIT:
1647		atapi_start_stop_unit(p, slot, cfis);
1648		break;
1649	case MODE_SENSE_10:
1650		atapi_mode_sense(p, slot, cfis);
1651		break;
1652	case GET_EVENT_STATUS_NOTIFICATION:
1653		atapi_get_event_status_notification(p, slot, cfis);
1654		break;
1655	default:
1656		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1657		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1658		p->asc = 0x20;
1659		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1660				ATA_S_READY | ATA_S_ERROR);
1661		break;
1662	}
1663}
1664
1665static void
1666ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1667{
1668
1669	p->tfd |= ATA_S_BUSY;
1670	switch (cfis[2]) {
1671	case ATA_ATA_IDENTIFY:
1672		handle_identify(p, slot, cfis);
1673		break;
1674	case ATA_SETFEATURES:
1675	{
1676		switch (cfis[3]) {
1677		case ATA_SF_ENAB_SATA_SF:
1678			switch (cfis[12]) {
1679			case ATA_SATA_SF_AN:
1680				p->tfd = ATA_S_DSC | ATA_S_READY;
1681				break;
1682			default:
1683				p->tfd = ATA_S_ERROR | ATA_S_READY;
1684				p->tfd |= (ATA_ERROR_ABORT << 8);
1685				break;
1686			}
1687			break;
1688		case ATA_SF_ENAB_WCACHE:
1689		case ATA_SF_DIS_WCACHE:
1690		case ATA_SF_ENAB_RCACHE:
1691		case ATA_SF_DIS_RCACHE:
1692			p->tfd = ATA_S_DSC | ATA_S_READY;
1693			break;
1694		case ATA_SF_SETXFER:
1695		{
1696			switch (cfis[12] & 0xf8) {
1697			case ATA_PIO:
1698			case ATA_PIO0:
1699				break;
1700			case ATA_WDMA0:
1701			case ATA_UDMA0:
1702				p->xfermode = (cfis[12] & 0x7);
1703				break;
1704			}
1705			p->tfd = ATA_S_DSC | ATA_S_READY;
1706			break;
1707		}
1708		default:
1709			p->tfd = ATA_S_ERROR | ATA_S_READY;
1710			p->tfd |= (ATA_ERROR_ABORT << 8);
1711			break;
1712		}
1713		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1714		break;
1715	}
1716	case ATA_SET_MULTI:
1717		if (cfis[12] != 0 &&
1718			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1719			p->tfd = ATA_S_ERROR | ATA_S_READY;
1720			p->tfd |= (ATA_ERROR_ABORT << 8);
1721		} else {
1722			p->mult_sectors = cfis[12];
1723			p->tfd = ATA_S_DSC | ATA_S_READY;
1724		}
1725		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1726		break;
1727	case ATA_READ:
1728	case ATA_WRITE:
1729	case ATA_READ48:
1730	case ATA_WRITE48:
1731	case ATA_READ_MUL:
1732	case ATA_WRITE_MUL:
1733	case ATA_READ_MUL48:
1734	case ATA_WRITE_MUL48:
1735	case ATA_READ_DMA:
1736	case ATA_WRITE_DMA:
1737	case ATA_READ_DMA48:
1738	case ATA_WRITE_DMA48:
1739	case ATA_READ_FPDMA_QUEUED:
1740	case ATA_WRITE_FPDMA_QUEUED:
1741		ahci_handle_rw(p, slot, cfis, 0);
1742		break;
1743	case ATA_FLUSHCACHE:
1744	case ATA_FLUSHCACHE48:
1745		ahci_handle_flush(p, slot, cfis);
1746		break;
1747	case ATA_DATA_SET_MANAGEMENT:
1748		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1749		    cfis[13] == 0 && cfis[12] == 1) {
1750			ahci_handle_dsm_trim(p, slot, cfis, 0);
1751			break;
1752		}
1753		ahci_write_fis_d2h(p, slot, cfis,
1754		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1755		break;
1756	case ATA_SEND_FPDMA_QUEUED:
1757		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1758		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1759		    cfis[11] == 0 && cfis[3] == 1) {
1760			ahci_handle_dsm_trim(p, slot, cfis, 0);
1761			break;
1762		}
1763		ahci_write_fis_d2h(p, slot, cfis,
1764		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1765		break;
1766	case ATA_READ_LOG_EXT:
1767	case ATA_READ_LOG_DMA_EXT:
1768		ahci_handle_read_log(p, slot, cfis);
1769		break;
1770	case ATA_SECURITY_FREEZE_LOCK:
1771	case ATA_SMART_CMD:
1772	case ATA_NOP:
1773		ahci_write_fis_d2h(p, slot, cfis,
1774		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1775		break;
1776	case ATA_CHECK_POWER_MODE:
1777		cfis[12] = 0xff;	/* always on */
1778		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1779		break;
1780	case ATA_STANDBY_CMD:
1781	case ATA_STANDBY_IMMEDIATE:
1782	case ATA_IDLE_CMD:
1783	case ATA_IDLE_IMMEDIATE:
1784	case ATA_SLEEP:
1785	case ATA_READ_VERIFY:
1786	case ATA_READ_VERIFY48:
1787		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1788		break;
1789	case ATA_ATAPI_IDENTIFY:
1790		handle_atapi_identify(p, slot, cfis);
1791		break;
1792	case ATA_PACKET_CMD:
1793		if (!p->atapi) {
1794			ahci_write_fis_d2h(p, slot, cfis,
1795			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1796		} else
1797			handle_packet_cmd(p, slot, cfis);
1798		break;
1799	default:
1800		WPRINTF("Unsupported cmd:%02x", cfis[2]);
1801		ahci_write_fis_d2h(p, slot, cfis,
1802		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1803		break;
1804	}
1805}
1806
1807static void
1808ahci_handle_slot(struct ahci_port *p, int slot)
1809{
1810	struct ahci_cmd_hdr *hdr;
1811#ifdef AHCI_DEBUG
1812	struct ahci_prdt_entry *prdt;
1813#endif
1814	struct pci_ahci_softc *sc;
1815	uint8_t *cfis;
1816#ifdef AHCI_DEBUG
1817	int cfl, i;
1818#endif
1819
1820	sc = p->pr_sc;
1821	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1822#ifdef AHCI_DEBUG
1823	cfl = (hdr->flags & 0x1f) * 4;
1824#endif
1825	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1826			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1827#ifdef AHCI_DEBUG
1828	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1829
1830	DPRINTF("cfis:");
1831	for (i = 0; i < cfl; i++) {
1832		if (i % 10 == 0)
1833			DPRINTF("");
1834		DPRINTF("%02x ", cfis[i]);
1835	}
1836	DPRINTF("");
1837
1838	for (i = 0; i < hdr->prdtl; i++) {
1839		DPRINTF("%d@%08"PRIx64"", prdt->dbc & 0x3fffff, prdt->dba);
1840		prdt++;
1841	}
1842#endif
1843
1844	if (cfis[0] != FIS_TYPE_REGH2D) {
1845		WPRINTF("Not a H2D FIS:%02x", cfis[0]);
1846		return;
1847	}
1848
1849	if (cfis[1] & 0x80) {
1850		ahci_handle_cmd(p, slot, cfis);
1851	} else {
1852		if (cfis[15] & (1 << 2))
1853			p->reset = 1;
1854		else if (p->reset) {
1855			p->reset = 0;
1856			ahci_port_reset(p);
1857		}
1858		p->ci &= ~(1 << slot);
1859	}
1860}
1861
1862static void
1863ahci_handle_port(struct ahci_port *p)
1864{
1865
1866	if (!(p->cmd & AHCI_P_CMD_ST))
1867		return;
1868
1869	/*
1870	 * Search for any new commands to issue ignoring those that
1871	 * are already in-flight.  Stop if device is busy or in error.
1872	 */
1873	for (; (p->ci & ~p->pending) != 0; p->ccs = ((p->ccs + 1) & 31)) {
1874		if ((p->tfd & (ATA_S_BUSY | ATA_S_DRQ)) != 0)
1875			break;
1876		if (p->waitforclear)
1877			break;
1878		if ((p->ci & ~p->pending & (1 << p->ccs)) != 0) {
1879			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1880			p->cmd |= p->ccs << AHCI_P_CMD_CCS_SHIFT;
1881			ahci_handle_slot(p, p->ccs);
1882		}
1883	}
1884}
1885
1886/*
1887 * blockif callback routine - this runs in the context of the blockif
1888 * i/o thread, so the mutex needs to be acquired.
1889 */
1890static void
1891ata_ioreq_cb(struct blockif_req *br, int err)
1892{
1893	struct ahci_cmd_hdr *hdr;
1894	struct ahci_ioreq *aior;
1895	struct ahci_port *p;
1896	struct pci_ahci_softc *sc;
1897	uint32_t tfd;
1898	uint8_t *cfis;
1899	int slot, ncq, dsm;
1900
1901	DPRINTF("%s %d", __func__, err);
1902
1903	ncq = dsm = 0;
1904	aior = br->br_param;
1905	p = aior->io_pr;
1906	cfis = aior->cfis;
1907	slot = aior->slot;
1908	sc = p->pr_sc;
1909	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1910
1911	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1912	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1913	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1914		ncq = 1;
1915	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
1916	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
1917	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
1918		dsm = 1;
1919
1920	pthread_mutex_lock(&sc->mtx);
1921
1922	/*
1923	 * Delete the blockif request from the busy list
1924	 */
1925	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1926
1927	/*
1928	 * Move the blockif request back to the free list
1929	 */
1930	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1931
1932	if (!err)
1933		hdr->prdbc = aior->done;
1934
1935	if (!err && aior->more) {
1936		if (dsm)
1937			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1938		else
1939			ahci_handle_rw(p, slot, cfis, aior->done);
1940		goto out;
1941	}
1942
1943	if (!err)
1944		tfd = ATA_S_READY | ATA_S_DSC;
1945	else
1946		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1947	if (ncq)
1948		ahci_write_fis_sdb(p, slot, cfis, tfd);
1949	else
1950		ahci_write_fis_d2h(p, slot, cfis, tfd);
1951
1952	/*
1953	 * This command is now complete.
1954	 */
1955	p->pending &= ~(1 << slot);
1956
1957	ahci_check_stopped(p);
1958	ahci_handle_port(p);
1959out:
1960	pthread_mutex_unlock(&sc->mtx);
1961	DPRINTF("%s exit", __func__);
1962}
1963
1964static void
1965atapi_ioreq_cb(struct blockif_req *br, int err)
1966{
1967	struct ahci_cmd_hdr *hdr;
1968	struct ahci_ioreq *aior;
1969	struct ahci_port *p;
1970	struct pci_ahci_softc *sc;
1971	uint8_t *cfis;
1972	uint32_t tfd;
1973	int slot;
1974
1975	DPRINTF("%s %d", __func__, err);
1976
1977	aior = br->br_param;
1978	p = aior->io_pr;
1979	cfis = aior->cfis;
1980	slot = aior->slot;
1981	sc = p->pr_sc;
1982	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1983
1984	pthread_mutex_lock(&sc->mtx);
1985
1986	/*
1987	 * Delete the blockif request from the busy list
1988	 */
1989	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1990
1991	/*
1992	 * Move the blockif request back to the free list
1993	 */
1994	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1995
1996	if (!err)
1997		hdr->prdbc = aior->done;
1998
1999	if (!err && aior->more) {
2000		atapi_read(p, slot, cfis, aior->done);
2001		goto out;
2002	}
2003
2004	if (!err) {
2005		tfd = ATA_S_READY | ATA_S_DSC;
2006	} else {
2007		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
2008		p->asc = 0x21;
2009		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
2010	}
2011	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
2012	ahci_write_fis_d2h(p, slot, cfis, tfd);
2013
2014	/*
2015	 * This command is now complete.
2016	 */
2017	p->pending &= ~(1 << slot);
2018
2019	ahci_check_stopped(p);
2020	ahci_handle_port(p);
2021out:
2022	pthread_mutex_unlock(&sc->mtx);
2023	DPRINTF("%s exit", __func__);
2024}
2025
2026static void
2027pci_ahci_ioreq_init(struct ahci_port *pr)
2028{
2029	struct ahci_ioreq *vr;
2030	int i;
2031
2032	pr->ioqsz = blockif_queuesz(pr->bctx);
2033	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
2034	STAILQ_INIT(&pr->iofhd);
2035
2036	/*
2037	 * Add all i/o request entries to the free queue
2038	 */
2039	for (i = 0; i < pr->ioqsz; i++) {
2040		vr = &pr->ioreq[i];
2041		vr->io_pr = pr;
2042		if (!pr->atapi)
2043			vr->io_req.br_callback = ata_ioreq_cb;
2044		else
2045			vr->io_req.br_callback = atapi_ioreq_cb;
2046		vr->io_req.br_param = vr;
2047		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
2048	}
2049
2050	TAILQ_INIT(&pr->iobhd);
2051}
2052
2053static void
2054pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2055{
2056	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2057	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2058	struct ahci_port *p = &sc->port[port];
2059
2060	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"",
2061		port, offset, value);
2062
2063	switch (offset) {
2064	case AHCI_P_CLB:
2065		p->clb = value;
2066		break;
2067	case AHCI_P_CLBU:
2068		p->clbu = value;
2069		break;
2070	case AHCI_P_FB:
2071		p->fb = value;
2072		break;
2073	case AHCI_P_FBU:
2074		p->fbu = value;
2075		break;
2076	case AHCI_P_IS:
2077		p->is &= ~value;
2078		ahci_port_intr(p);
2079		break;
2080	case AHCI_P_IE:
2081		p->ie = value & 0xFDC000FF;
2082		ahci_port_intr(p);
2083		break;
2084	case AHCI_P_CMD:
2085	{
2086		p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2087		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2088		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2089		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK);
2090		p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2091		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2092		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2093		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value;
2094
2095		if (!(value & AHCI_P_CMD_ST)) {
2096			ahci_port_stop(p);
2097		} else {
2098			uint64_t clb;
2099
2100			p->cmd |= AHCI_P_CMD_CR;
2101			clb = (uint64_t)p->clbu << 32 | p->clb;
2102			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
2103					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
2104		}
2105
2106		if (value & AHCI_P_CMD_FRE) {
2107			uint64_t fb;
2108
2109			p->cmd |= AHCI_P_CMD_FR;
2110			fb = (uint64_t)p->fbu << 32 | p->fb;
2111			/* we don't support FBSCP, so rfis size is 256Bytes */
2112			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
2113		} else {
2114			p->cmd &= ~AHCI_P_CMD_FR;
2115		}
2116
2117		if (value & AHCI_P_CMD_CLO) {
2118			p->tfd &= ~(ATA_S_BUSY | ATA_S_DRQ);
2119			p->cmd &= ~AHCI_P_CMD_CLO;
2120		}
2121
2122		if (value & AHCI_P_CMD_ICC_MASK) {
2123			p->cmd &= ~AHCI_P_CMD_ICC_MASK;
2124		}
2125
2126		ahci_handle_port(p);
2127		break;
2128	}
2129	case AHCI_P_TFD:
2130	case AHCI_P_SIG:
2131	case AHCI_P_SSTS:
2132		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"", offset);
2133		break;
2134	case AHCI_P_SCTL:
2135		p->sctl = value;
2136		if (!(p->cmd & AHCI_P_CMD_ST)) {
2137			if (value & ATA_SC_DET_RESET)
2138				ahci_port_reset(p);
2139		}
2140		break;
2141	case AHCI_P_SERR:
2142		p->serr &= ~value;
2143		break;
2144	case AHCI_P_SACT:
2145		p->sact |= value;
2146		break;
2147	case AHCI_P_CI:
2148		p->ci |= value;
2149		ahci_handle_port(p);
2150		break;
2151	case AHCI_P_SNTF:
2152	case AHCI_P_FBS:
2153	default:
2154		break;
2155	}
2156}
2157
2158static void
2159pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2160{
2161	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"",
2162		offset, value);
2163
2164	switch (offset) {
2165	case AHCI_CAP:
2166	case AHCI_PI:
2167	case AHCI_VS:
2168	case AHCI_CAP2:
2169		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"", offset);
2170		break;
2171	case AHCI_GHC:
2172		if (value & AHCI_GHC_HR) {
2173			ahci_reset(sc);
2174			break;
2175		}
2176		if (value & AHCI_GHC_IE)
2177			sc->ghc |= AHCI_GHC_IE;
2178		else
2179			sc->ghc &= ~AHCI_GHC_IE;
2180		ahci_generate_intr(sc, 0xffffffff);
2181		break;
2182	case AHCI_IS:
2183		sc->is &= ~value;
2184		ahci_generate_intr(sc, value);
2185		break;
2186	default:
2187		break;
2188	}
2189}
2190
2191static void
2192pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
2193		int baridx, uint64_t offset, int size, uint64_t value)
2194{
2195	struct pci_ahci_softc *sc = pi->pi_arg;
2196
2197	assert(baridx == 5);
2198	assert((offset % 4) == 0 && size == 4);
2199
2200	pthread_mutex_lock(&sc->mtx);
2201
2202	if (offset < AHCI_OFFSET)
2203		pci_ahci_host_write(sc, offset, value);
2204	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2205		pci_ahci_port_write(sc, offset, value);
2206	else
2207		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"", offset);
2208
2209	pthread_mutex_unlock(&sc->mtx);
2210}
2211
2212static uint64_t
2213pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2214{
2215	uint32_t value;
2216
2217	switch (offset) {
2218	case AHCI_CAP:
2219	case AHCI_GHC:
2220	case AHCI_IS:
2221	case AHCI_PI:
2222	case AHCI_VS:
2223	case AHCI_CCCC:
2224	case AHCI_CCCP:
2225	case AHCI_EM_LOC:
2226	case AHCI_EM_CTL:
2227	case AHCI_CAP2:
2228	{
2229		uint32_t *p = &sc->cap;
2230		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2231		value = *p;
2232		break;
2233	}
2234	default:
2235		value = 0;
2236		break;
2237	}
2238	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x",
2239		offset, value);
2240
2241	return (value);
2242}
2243
2244static uint64_t
2245pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2246{
2247	uint32_t value;
2248	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2249	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2250
2251	switch (offset) {
2252	case AHCI_P_CLB:
2253	case AHCI_P_CLBU:
2254	case AHCI_P_FB:
2255	case AHCI_P_FBU:
2256	case AHCI_P_IS:
2257	case AHCI_P_IE:
2258	case AHCI_P_CMD:
2259	case AHCI_P_TFD:
2260	case AHCI_P_SIG:
2261	case AHCI_P_SSTS:
2262	case AHCI_P_SCTL:
2263	case AHCI_P_SERR:
2264	case AHCI_P_SACT:
2265	case AHCI_P_CI:
2266	case AHCI_P_SNTF:
2267	case AHCI_P_FBS:
2268	{
2269		uint32_t *p= &sc->port[port].clb;
2270		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2271		value = *p;
2272		break;
2273	}
2274	default:
2275		value = 0;
2276		break;
2277	}
2278
2279	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x",
2280		port, offset, value);
2281
2282	return value;
2283}
2284
2285static uint64_t
2286pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
2287    uint64_t regoff, int size)
2288{
2289	struct pci_ahci_softc *sc = pi->pi_arg;
2290	uint64_t offset;
2291	uint32_t value;
2292
2293	assert(baridx == 5);
2294	assert(size == 1 || size == 2 || size == 4);
2295	assert((regoff & (size - 1)) == 0);
2296
2297	pthread_mutex_lock(&sc->mtx);
2298
2299	offset = regoff & ~0x3;	    /* round down to a multiple of 4 bytes */
2300	if (offset < AHCI_OFFSET)
2301		value = pci_ahci_host_read(sc, offset);
2302	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2303		value = pci_ahci_port_read(sc, offset);
2304	else {
2305		value = 0;
2306		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"",
2307		    regoff);
2308	}
2309	value >>= 8 * (regoff & 0x3);
2310
2311	pthread_mutex_unlock(&sc->mtx);
2312
2313	return (value);
2314}
2315
2316static int
2317pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
2318{
2319	char bident[sizeof("XX:XX:XX")];
2320	struct blockif_ctxt *bctxt;
2321	struct pci_ahci_softc *sc;
2322	int ret, slots, p;
2323	MD5_CTX mdctx;
2324	u_char digest[16];
2325	char *next, *next2;
2326	char *bopt, *uopt, *xopts, *config;
2327	FILE* fp;
2328	size_t block_len;
2329	int comma, optpos;
2330
2331	ret = 0;
2332
2333#ifdef AHCI_DEBUG
2334	dbg = fopen("/tmp/log", "w+");
2335#endif
2336
2337	sc = calloc(1, sizeof(struct pci_ahci_softc));
2338	pi->pi_arg = sc;
2339	sc->asc_pi = pi;
2340	pthread_mutex_init(&sc->mtx, NULL);
2341	sc->ports = 0;
2342	sc->pi = 0;
2343	slots = 32;
2344
2345	for (p = 0; p < MAX_PORTS && opts != NULL; p++, opts = next) {
2346		struct ata_params *ata_ident = &sc->port[p].ata_ident;
2347		memset(ata_ident, 0, sizeof(struct ata_params));
2348
2349		/* Identify and cut off type of present port. */
2350		if (strncmp(opts, "hd:", 3) == 0) {
2351			atapi = 0;
2352			opts += 3;
2353		} else if (strncmp(opts, "cd:", 3) == 0) {
2354			atapi = 1;
2355			opts += 3;
2356		}
2357
2358		/* Find and cut off the next port options. */
2359		next = strstr(opts, ",hd:");
2360		next2 = strstr(opts, ",cd:");
2361		if (next == NULL || (next2 != NULL && next2 < next))
2362			next = next2;
2363		if (next != NULL) {
2364			next[0] = 0;
2365			next++;
2366		}
2367
2368		if (opts[0] == 0)
2369			continue;
2370
2371		uopt = strdup(opts);
2372		bopt = NULL;
2373		fp = open_memstream(&bopt, &block_len);
2374		comma = 0;
2375		optpos = 0;
2376
2377		for (xopts = strtok(uopt, ",");
2378		     xopts != NULL;
2379		     xopts = strtok(NULL, ",")) {
2380
2381			/* First option assume as block filename. */
2382			if (optpos == 0) {
2383				/*
2384				 * Create an identifier for the backing file.
2385				 * Use parts of the md5 sum of the filename
2386				 */
2387				char ident[AHCI_PORT_IDENT];
2388				MD5Init(&mdctx);
2389				MD5Update(&mdctx, opts, strlen(opts));
2390				MD5Final(digest, &mdctx);
2391				snprintf(ident, AHCI_PORT_IDENT,
2392					"BHYVE-%02X%02X-%02X%02X-%02X%02X",
2393					digest[0], digest[1], digest[2], digest[3], digest[4],
2394					digest[5]);
2395				ata_string((uint8_t*)&ata_ident->serial, ident, 20);
2396				ata_string((uint8_t*)&ata_ident->revision, "001", 8);
2397				if (atapi) {
2398					ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DVD ROM", 40);
2399				}
2400				else {
2401					ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DISK", 40);
2402				}
2403			}
2404
2405			if ((config = strchr(xopts, '=')) != NULL) {
2406				*config++ = '\0';
2407				if (!strcmp("nmrr", xopts)) {
2408					ata_ident->media_rotation_rate = atoi(config);
2409				}
2410				else if (!strcmp("ser", xopts)) {
2411					ata_string((uint8_t*)(&ata_ident->serial), config, 20);
2412				}
2413				else if (!strcmp("rev", xopts)) {
2414					ata_string((uint8_t*)(&ata_ident->revision), config, 8);
2415				}
2416				else if (!strcmp("model", xopts)) {
2417					ata_string((uint8_t*)(&ata_ident->model), config, 40);
2418				}
2419				else {
2420					/* Pass all other options to blockif_open. */
2421					*--config = '=';
2422					fprintf(fp, "%s%s", comma ? "," : "", xopts);
2423					comma = 1;
2424				}
2425			}
2426			else {
2427				/* Pass all other options to blockif_open. */
2428				fprintf(fp, "%s%s", comma ? "," : "", xopts);
2429				comma = 1;
2430			}
2431			optpos++;
2432		}
2433		free(uopt);
2434		fclose(fp);
2435
2436		DPRINTF("%s\n", bopt);
2437
2438		/*
2439		 * Attempt to open the backing image. Use the PCI slot/func
2440		 * and the port number for the identifier string.
2441		 */
2442		snprintf(bident, sizeof(bident), "%d:%d:%d", pi->pi_slot,
2443		    pi->pi_func, p);
2444		bctxt = blockif_open(bopt, bident);
2445		free(bopt);
2446
2447		if (bctxt == NULL) {
2448			sc->ports = p;
2449			ret = 1;
2450			goto open_fail;
2451		}
2452		sc->port[p].bctx = bctxt;
2453		sc->port[p].pr_sc = sc;
2454		sc->port[p].port = p;
2455		sc->port[p].atapi = atapi;
2456
2457		ata_identify_init(&sc->port[p], atapi);
2458
2459		/*
2460		 * Allocate blockif request structures and add them
2461		 * to the free list
2462		 */
2463		pci_ahci_ioreq_init(&sc->port[p]);
2464
2465		sc->pi |= (1 << p);
2466		if (sc->port[p].ioqsz < slots)
2467			slots = sc->port[p].ioqsz;
2468	}
2469	sc->ports = p;
2470
2471	/* Intel ICH8 AHCI */
2472	--slots;
2473	if (sc->ports < DEF_PORTS)
2474		sc->ports = DEF_PORTS;
2475	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2476	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2477	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2478	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2479	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2480
2481	sc->vs = 0x10300;
2482	sc->cap2 = AHCI_CAP2_APST;
2483	ahci_reset(sc);
2484
2485	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2486	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2487	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2488	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2489	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2490	p = MIN(sc->ports, 16);
2491	p = flsl(p) - ((p & (p - 1)) ? 0 : 1);
2492	pci_emul_add_msicap(pi, 1 << p);
2493	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2494	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2495
2496	pci_lintr_request(pi);
2497
2498open_fail:
2499	if (ret) {
2500		for (p = 0; p < sc->ports; p++) {
2501			if (sc->port[p].bctx != NULL)
2502				blockif_close(sc->port[p].bctx);
2503		}
2504		free(sc);
2505	}
2506
2507	return (ret);
2508}
2509
2510static int
2511pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2512{
2513
2514	return (pci_ahci_init(ctx, pi, opts, 0));
2515}
2516
2517static int
2518pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2519{
2520
2521	return (pci_ahci_init(ctx, pi, opts, 1));
2522}
2523
2524#ifdef BHYVE_SNAPSHOT
2525static int
2526pci_ahci_snapshot_save_queues(struct ahci_port *port,
2527			      struct vm_snapshot_meta *meta)
2528{
2529	int ret;
2530	int idx;
2531	struct ahci_ioreq *ioreq;
2532
2533	STAILQ_FOREACH(ioreq, &port->iofhd, io_flist) {
2534		idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq);
2535		SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
2536	}
2537
2538	idx = -1;
2539	SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
2540
2541	TAILQ_FOREACH(ioreq, &port->iobhd, io_blist) {
2542		idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq);
2543		SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
2544
2545		/*
2546		 * Snapshot only the busy requests; other requests are
2547		 * not valid.
2548		 */
2549		ret = blockif_snapshot_req(&ioreq->io_req, meta);
2550		if (ret != 0) {
2551			fprintf(stderr, "%s: failed to snapshot req\r\n",
2552				__func__);
2553			goto done;
2554		}
2555	}
2556
2557	idx = -1;
2558	SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
2559
2560done:
2561	return (ret);
2562}
2563
2564static int
2565pci_ahci_snapshot_restore_queues(struct ahci_port *port,
2566				 struct vm_snapshot_meta *meta)
2567{
2568	int ret;
2569	int idx;
2570	struct ahci_ioreq *ioreq;
2571
2572	/* Empty the free queue before restoring. */
2573	while (!STAILQ_EMPTY(&port->iofhd))
2574		STAILQ_REMOVE_HEAD(&port->iofhd, io_flist);
2575
2576	/* Restore the free queue. */
2577	while (1) {
2578		SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
2579		if (idx == -1)
2580			break;
2581
2582		STAILQ_INSERT_TAIL(&port->iofhd, &port->ioreq[idx], io_flist);
2583	}
2584
2585	/* Restore the busy queue. */
2586	while (1) {
2587		SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
2588		if (idx == -1)
2589			break;
2590
2591		ioreq = &port->ioreq[idx];
2592		TAILQ_INSERT_TAIL(&port->iobhd, ioreq, io_blist);
2593
2594		/*
2595		 * Restore only the busy requests; other requests are
2596		 * not valid.
2597		 */
2598		ret = blockif_snapshot_req(&ioreq->io_req, meta);
2599		if (ret != 0) {
2600			fprintf(stderr, "%s: failed to restore request\r\n",
2601				__func__);
2602			goto done;
2603		}
2604
2605		/* Re-enqueue the requests in the block interface. */
2606		if (ioreq->readop)
2607			ret = blockif_read(port->bctx, &ioreq->io_req);
2608		else
2609			ret = blockif_write(port->bctx, &ioreq->io_req);
2610
2611		if (ret != 0) {
2612			fprintf(stderr,
2613				"%s: failed to re-enqueue request\r\n",
2614				__func__);
2615			goto done;
2616		}
2617	}
2618
2619done:
2620	return (ret);
2621}
2622
2623static int
2624pci_ahci_snapshot(struct vm_snapshot_meta *meta)
2625{
2626	int i, j, ret;
2627	void *bctx;
2628	struct pci_devinst *pi;
2629	struct pci_ahci_softc *sc;
2630	struct ahci_port *port;
2631	struct ahci_cmd_hdr *hdr;
2632	struct ahci_ioreq *ioreq;
2633
2634	pi = meta->dev_data;
2635	sc = pi->pi_arg;
2636
2637	/* TODO: add mtx lock/unlock */
2638
2639	SNAPSHOT_VAR_OR_LEAVE(sc->ports, meta, ret, done);
2640	SNAPSHOT_VAR_OR_LEAVE(sc->cap, meta, ret, done);
2641	SNAPSHOT_VAR_OR_LEAVE(sc->ghc, meta, ret, done);
2642	SNAPSHOT_VAR_OR_LEAVE(sc->is, meta, ret, done);
2643	SNAPSHOT_VAR_OR_LEAVE(sc->pi, meta, ret, done);
2644	SNAPSHOT_VAR_OR_LEAVE(sc->vs, meta, ret, done);
2645	SNAPSHOT_VAR_OR_LEAVE(sc->ccc_ctl, meta, ret, done);
2646	SNAPSHOT_VAR_OR_LEAVE(sc->ccc_pts, meta, ret, done);
2647	SNAPSHOT_VAR_OR_LEAVE(sc->em_loc, meta, ret, done);
2648	SNAPSHOT_VAR_OR_LEAVE(sc->em_ctl, meta, ret, done);
2649	SNAPSHOT_VAR_OR_LEAVE(sc->cap2, meta, ret, done);
2650	SNAPSHOT_VAR_OR_LEAVE(sc->bohc, meta, ret, done);
2651	SNAPSHOT_VAR_OR_LEAVE(sc->lintr, meta, ret, done);
2652
2653	for (i = 0; i < MAX_PORTS; i++) {
2654		port = &sc->port[i];
2655
2656		if (meta->op == VM_SNAPSHOT_SAVE)
2657			bctx = port->bctx;
2658
2659		SNAPSHOT_VAR_OR_LEAVE(bctx, meta, ret, done);
2660		SNAPSHOT_VAR_OR_LEAVE(port->port, meta, ret, done);
2661
2662		/* Mostly for restore; save is ensured by the lines above. */
2663		if (((bctx == NULL) && (port->bctx != NULL)) ||
2664		    ((bctx != NULL) && (port->bctx == NULL))) {
2665			fprintf(stderr, "%s: ports not matching\r\n", __func__);
2666			ret = EINVAL;
2667			goto done;
2668		}
2669
2670		if (port->bctx == NULL)
2671			continue;
2672
2673		if (port->port != i) {
2674			fprintf(stderr, "%s: ports not matching: "
2675					"actual: %d expected: %d\r\n",
2676					__func__, port->port, i);
2677			ret = EINVAL;
2678			goto done;
2679		}
2680
2681		SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(port->cmd_lst,
2682			AHCI_CL_SIZE * AHCI_MAX_SLOTS, false, meta, ret, done);
2683		SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(port->rfis, 256, false, meta,
2684			ret, done);
2685
2686		SNAPSHOT_VAR_OR_LEAVE(port->ata_ident, meta, ret, done);
2687		SNAPSHOT_VAR_OR_LEAVE(port->atapi, meta, ret, done);
2688		SNAPSHOT_VAR_OR_LEAVE(port->reset, meta, ret, done);
2689		SNAPSHOT_VAR_OR_LEAVE(port->waitforclear, meta, ret, done);
2690		SNAPSHOT_VAR_OR_LEAVE(port->mult_sectors, meta, ret, done);
2691		SNAPSHOT_VAR_OR_LEAVE(port->xfermode, meta, ret, done);
2692		SNAPSHOT_VAR_OR_LEAVE(port->err_cfis, meta, ret, done);
2693		SNAPSHOT_VAR_OR_LEAVE(port->sense_key, meta, ret, done);
2694		SNAPSHOT_VAR_OR_LEAVE(port->asc, meta, ret, done);
2695		SNAPSHOT_VAR_OR_LEAVE(port->ccs, meta, ret, done);
2696		SNAPSHOT_VAR_OR_LEAVE(port->pending, meta, ret, done);
2697
2698		SNAPSHOT_VAR_OR_LEAVE(port->clb, meta, ret, done);
2699		SNAPSHOT_VAR_OR_LEAVE(port->clbu, meta, ret, done);
2700		SNAPSHOT_VAR_OR_LEAVE(port->fb, meta, ret, done);
2701		SNAPSHOT_VAR_OR_LEAVE(port->fbu, meta, ret, done);
2702		SNAPSHOT_VAR_OR_LEAVE(port->ie, meta, ret, done);
2703		SNAPSHOT_VAR_OR_LEAVE(port->cmd, meta, ret, done);
2704		SNAPSHOT_VAR_OR_LEAVE(port->unused0, meta, ret, done);
2705		SNAPSHOT_VAR_OR_LEAVE(port->tfd, meta, ret, done);
2706		SNAPSHOT_VAR_OR_LEAVE(port->sig, meta, ret, done);
2707		SNAPSHOT_VAR_OR_LEAVE(port->ssts, meta, ret, done);
2708		SNAPSHOT_VAR_OR_LEAVE(port->sctl, meta, ret, done);
2709		SNAPSHOT_VAR_OR_LEAVE(port->serr, meta, ret, done);
2710		SNAPSHOT_VAR_OR_LEAVE(port->sact, meta, ret, done);
2711		SNAPSHOT_VAR_OR_LEAVE(port->ci, meta, ret, done);
2712		SNAPSHOT_VAR_OR_LEAVE(port->sntf, meta, ret, done);
2713		SNAPSHOT_VAR_OR_LEAVE(port->fbs, meta, ret, done);
2714		SNAPSHOT_VAR_OR_LEAVE(port->ioqsz, meta, ret, done);
2715
2716		for (j = 0; j < port->ioqsz; j++) {
2717			ioreq = &port->ioreq[j];
2718
2719			/* blockif_req snapshot done only for busy requests. */
2720			hdr = (struct ahci_cmd_hdr *)(port->cmd_lst +
2721				ioreq->slot * AHCI_CL_SIZE);
2722			SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(ioreq->cfis,
2723				0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry),
2724				false, meta, ret, done);
2725
2726			SNAPSHOT_VAR_OR_LEAVE(ioreq->len, meta, ret, done);
2727			SNAPSHOT_VAR_OR_LEAVE(ioreq->done, meta, ret, done);
2728			SNAPSHOT_VAR_OR_LEAVE(ioreq->slot, meta, ret, done);
2729			SNAPSHOT_VAR_OR_LEAVE(ioreq->more, meta, ret, done);
2730			SNAPSHOT_VAR_OR_LEAVE(ioreq->readop, meta, ret, done);
2731		}
2732
2733		/* Perform save / restore specific operations. */
2734		if (meta->op == VM_SNAPSHOT_SAVE) {
2735			ret = pci_ahci_snapshot_save_queues(port, meta);
2736			if (ret != 0)
2737				goto done;
2738		} else if (meta->op == VM_SNAPSHOT_RESTORE) {
2739			ret = pci_ahci_snapshot_restore_queues(port, meta);
2740			if (ret != 0)
2741				goto done;
2742		} else {
2743			ret = EINVAL;
2744			goto done;
2745		}
2746
2747		ret = blockif_snapshot(port->bctx, meta);
2748		if (ret != 0) {
2749			fprintf(stderr, "%s: failed to restore blockif\r\n",
2750				__func__);
2751			goto done;
2752		}
2753	}
2754
2755done:
2756	return (ret);
2757}
2758
2759static int
2760pci_ahci_pause(struct vmctx *ctx, struct pci_devinst *pi)
2761{
2762	struct pci_ahci_softc *sc;
2763	struct blockif_ctxt *bctxt;
2764	int i;
2765
2766	sc = pi->pi_arg;
2767
2768	for (i = 0; i < MAX_PORTS; i++) {
2769		bctxt = sc->port[i].bctx;
2770		if (bctxt == NULL)
2771			continue;
2772
2773		blockif_pause(bctxt);
2774	}
2775
2776	return (0);
2777}
2778
2779static int
2780pci_ahci_resume(struct vmctx *ctx, struct pci_devinst *pi)
2781{
2782	struct pci_ahci_softc *sc;
2783	struct blockif_ctxt *bctxt;
2784	int i;
2785
2786	sc = pi->pi_arg;
2787
2788	for (i = 0; i < MAX_PORTS; i++) {
2789		bctxt = sc->port[i].bctx;
2790		if (bctxt == NULL)
2791			continue;
2792
2793		blockif_resume(bctxt);
2794	}
2795
2796	return (0);
2797}
2798#endif
2799
2800/*
2801 * Use separate emulation names to distinguish drive and atapi devices
2802 */
2803struct pci_devemu pci_de_ahci = {
2804	.pe_emu =	"ahci",
2805	.pe_init =	pci_ahci_hd_init,
2806	.pe_barwrite =	pci_ahci_write,
2807	.pe_barread =	pci_ahci_read,
2808#ifdef BHYVE_SNAPSHOT
2809	.pe_snapshot =	pci_ahci_snapshot,
2810	.pe_pause =	pci_ahci_pause,
2811	.pe_resume =	pci_ahci_resume,
2812#endif
2813};
2814PCI_EMUL_SET(pci_de_ahci);
2815
2816struct pci_devemu pci_de_ahci_hd = {
2817	.pe_emu =	"ahci-hd",
2818	.pe_init =	pci_ahci_hd_init,
2819	.pe_barwrite =	pci_ahci_write,
2820	.pe_barread =	pci_ahci_read,
2821#ifdef BHYVE_SNAPSHOT
2822	.pe_snapshot =	pci_ahci_snapshot,
2823	.pe_pause =	pci_ahci_pause,
2824	.pe_resume =	pci_ahci_resume,
2825#endif
2826};
2827PCI_EMUL_SET(pci_de_ahci_hd);
2828
2829struct pci_devemu pci_de_ahci_cd = {
2830	.pe_emu =	"ahci-cd",
2831	.pe_init =	pci_ahci_atapi_init,
2832	.pe_barwrite =	pci_ahci_write,
2833	.pe_barread =	pci_ahci_read,
2834#ifdef BHYVE_SNAPSHOT
2835	.pe_snapshot =	pci_ahci_snapshot,
2836	.pe_pause =	pci_ahci_pause,
2837	.pe_resume =	pci_ahci_resume,
2838#endif
2839};
2840PCI_EMUL_SET(pci_de_ahci_cd);
2841