1230557Sjimharris/*-
2230557Sjimharris * Copyright (c) 2015 Netflix, Inc
3230557Sjimharris * All rights reserved.
4230557Sjimharris *
5230557Sjimharris * Redistribution and use in source and binary forms, with or without
6230557Sjimharris * modification, are permitted provided that the following conditions
7230557Sjimharris * are met:
8230557Sjimharris * 1. Redistributions of source code must retain the above copyright
9230557Sjimharris *    notice, this list of conditions and the following disclaimer,
10230557Sjimharris *    without modification, immediately at the beginning of the file.
11230557Sjimharris * 2. Redistributions in binary form must reproduce the above copyright
12230557Sjimharris *    notice, this list of conditions and the following disclaimer in the
13230557Sjimharris *    documentation and/or other materials provided with the distribution.
14230557Sjimharris *
15230557Sjimharris * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16230557Sjimharris * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17230557Sjimharris * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18230557Sjimharris * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19230557Sjimharris * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20230557Sjimharris * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21230557Sjimharris * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22230557Sjimharris * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23230557Sjimharris * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24230557Sjimharris * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25230557Sjimharris *
26230557Sjimharris * Derived from ata_da.c:
27230557Sjimharris * Copyright (c) 2009 Alexander Motin <mav@FreeBSD.org>
28230557Sjimharris */
29230557Sjimharris
30230557Sjimharris#include <sys/cdefs.h>
31230557Sjimharris__FBSDID("$FreeBSD: releng/11.0/sys/cam/nvme/nvme_da.c 301771 2016-06-09 22:39:02Z imp $");
32230557Sjimharris
33230557Sjimharris#include <sys/param.h>
34230557Sjimharris
35230557Sjimharris#ifdef _KERNEL
36230557Sjimharris#include <sys/systm.h>
37230557Sjimharris#include <sys/kernel.h>
38230557Sjimharris#include <sys/bio.h>
39230557Sjimharris#include <sys/sysctl.h>
40230557Sjimharris#include <sys/taskqueue.h>
41230557Sjimharris#include <sys/lock.h>
42230557Sjimharris#include <sys/mutex.h>
43230557Sjimharris#include <sys/conf.h>
44230557Sjimharris#include <sys/devicestat.h>
45230557Sjimharris#include <sys/eventhandler.h>
46230557Sjimharris#include <sys/malloc.h>
47230557Sjimharris#include <sys/cons.h>
48230557Sjimharris#include <sys/proc.h>
49230557Sjimharris#include <sys/reboot.h>
50230557Sjimharris#include <geom/geom_disk.h>
51230557Sjimharris#endif /* _KERNEL */
52230557Sjimharris
53230557Sjimharris#ifndef _KERNEL
54230557Sjimharris#include <stdio.h>
55230557Sjimharris#include <string.h>
56230557Sjimharris#endif /* _KERNEL */
57230557Sjimharris
58230557Sjimharris#include <cam/cam.h>
59230557Sjimharris#include <cam/cam_ccb.h>
60230557Sjimharris#include <cam/cam_periph.h>
61230557Sjimharris#include <cam/cam_xpt_periph.h>
62230557Sjimharris#include <cam/cam_sim.h>
63230557Sjimharris#include <cam/cam_iosched.h>
64230557Sjimharris
65230557Sjimharris#include <cam/nvme/nvme_all.h>
66230557Sjimharris
67230557Sjimharristypedef enum {
68230557Sjimharris	NDA_STATE_NORMAL
69230557Sjimharris} nda_state;
70230557Sjimharris
71230557Sjimharristypedef enum {
72230557Sjimharris	NDA_FLAG_OPEN		= 0x0001,
73230557Sjimharris	NDA_FLAG_DIRTY		= 0x0002,
74230557Sjimharris	NDA_FLAG_SCTX_INIT	= 0x0004,
75230557Sjimharris} nda_flags;
76230557Sjimharris
77230557Sjimharristypedef enum {
78230557Sjimharris	NDA_Q_4K   = 0x01,
79230557Sjimharris	NDA_Q_NONE = 0x00,
80230557Sjimharris} nda_quirks;
81230557Sjimharris
82230557Sjimharris#define NDA_Q_BIT_STRING	\
83230557Sjimharris	"\020"			\
84230557Sjimharris	"\001Bit 0"
85230557Sjimharris
86230557Sjimharristypedef enum {
87230557Sjimharris	NDA_CCB_BUFFER_IO	= 0x01,
88234106Sjimharris	NDA_CCB_DUMP            = 0x02,
89230557Sjimharris	NDA_CCB_TRIM            = 0x03,
90234106Sjimharris	NDA_CCB_TYPE_MASK	= 0x0F,
91230557Sjimharris} nda_ccb_state;
92230557Sjimharris
93230557Sjimharris/* Offsets into our private area for storing information */
94230557Sjimharris#define ccb_state	ppriv_field0
95230557Sjimharris#define ccb_bp		ppriv_ptr1
96230557Sjimharris
97230557Sjimharrisstruct trim_request {
98230557Sjimharris	TAILQ_HEAD(, bio) bps;
99230557Sjimharris};
100230557Sjimharrisstruct nda_softc {
101230557Sjimharris	struct   cam_iosched_softc *cam_iosched;
102230557Sjimharris	int	 outstanding_cmds;	/* Number of active commands */
103230557Sjimharris	int	 refcount;		/* Active xpt_action() calls */
104230557Sjimharris	nda_state state;
105230557Sjimharris	nda_flags flags;
106230557Sjimharris	nda_quirks quirks;
107230557Sjimharris	int	 unmappedio;
108230557Sjimharris	uint32_t  nsid;			/* Namespace ID for this nda device */
109230557Sjimharris	struct disk *disk;
110230557Sjimharris	struct task		sysctl_task;
111230557Sjimharris	struct sysctl_ctx_list	sysctl_ctx;
112230557Sjimharris	struct sysctl_oid	*sysctl_tree;
113230557Sjimharris	struct trim_request	trim_req;
114230557Sjimharris#ifdef CAM_IO_STATS
115230557Sjimharris	struct sysctl_ctx_list	sysctl_stats_ctx;
116230557Sjimharris	struct sysctl_oid	*sysctl_stats_tree;
117230557Sjimharris	u_int	timeouts;
118230557Sjimharris	u_int	errors;
119230557Sjimharris	u_int	invalidations;
120230557Sjimharris#endif
121230557Sjimharris};
122230557Sjimharris
123230557Sjimharris/* Need quirk table */
124230557Sjimharris
125230557Sjimharrisstatic	disk_strategy_t	ndastrategy;
126230557Sjimharrisstatic	dumper_t	ndadump;
127230557Sjimharrisstatic	periph_init_t	ndainit;
128230557Sjimharrisstatic	void		ndaasync(void *callback_arg, u_int32_t code,
129230557Sjimharris				struct cam_path *path, void *arg);
130230557Sjimharrisstatic	void		ndasysctlinit(void *context, int pending);
131230557Sjimharrisstatic	periph_ctor_t	ndaregister;
132230557Sjimharrisstatic	periph_dtor_t	ndacleanup;
133230557Sjimharrisstatic	periph_start_t	ndastart;
134230557Sjimharrisstatic	periph_oninv_t	ndaoninvalidate;
135230557Sjimharrisstatic	void		ndadone(struct cam_periph *periph,
136230557Sjimharris			       union ccb *done_ccb);
137230557Sjimharrisstatic  int		ndaerror(union ccb *ccb, u_int32_t cam_flags,
138230557Sjimharris				u_int32_t sense_flags);
139230557Sjimharrisstatic void		ndashutdown(void *arg, int howto);
140230557Sjimharrisstatic void		ndasuspend(void *arg);
141230557Sjimharris
142230557Sjimharris#ifndef	NDA_DEFAULT_SEND_ORDERED
143230557Sjimharris#define	NDA_DEFAULT_SEND_ORDERED	1
144230557Sjimharris#endif
145230557Sjimharris#ifndef NDA_DEFAULT_TIMEOUT
146230557Sjimharris#define NDA_DEFAULT_TIMEOUT 30	/* Timeout in seconds */
147230557Sjimharris#endif
148230557Sjimharris#ifndef	NDA_DEFAULT_RETRY
149230557Sjimharris#define	NDA_DEFAULT_RETRY	4
150230557Sjimharris#endif
151230557Sjimharris
152230557Sjimharris
153230557Sjimharris//static int nda_retry_count = NDA_DEFAULT_RETRY;
154230557Sjimharrisstatic int nda_send_ordered = NDA_DEFAULT_SEND_ORDERED;
155230557Sjimharrisstatic int nda_default_timeout = NDA_DEFAULT_TIMEOUT;
156230557Sjimharris
157230557Sjimharris/*
158230557Sjimharris * All NVMe media is non-rotational, so all nvme device instances
159230557Sjimharris * share this to implement the sysctl.
160230557Sjimharris */
161230557Sjimharrisstatic int nda_rotating_media = 0;
162230557Sjimharris
163230557Sjimharrisstatic SYSCTL_NODE(_kern_cam, OID_AUTO, nda, CTLFLAG_RD, 0,
164230557Sjimharris            "CAM Direct Access Disk driver");
165230557Sjimharris
166230557Sjimharrisstatic struct periph_driver ndadriver =
167230557Sjimharris{
168234106Sjimharris	ndainit, "nda",
169230557Sjimharris	TAILQ_HEAD_INITIALIZER(ndadriver.units), /* generation */ 0
170230557Sjimharris};
171230557Sjimharris
172230557SjimharrisPERIPHDRIVER_DECLARE(nda, ndadriver);
173230557Sjimharris
174230557Sjimharrisstatic MALLOC_DEFINE(M_NVMEDA, "nvme_da", "nvme_da buffers");
175230557Sjimharris
176230557Sjimharris/*
177230557Sjimharris * nice wrappers. Maybe these belong in nvme_all.c instead of
178230557Sjimharris * here, but this is the only place that uses these. Should
179230557Sjimharris * we ever grow another NVME periph, we should move them
180230557Sjimharris * all there wholesale.
181230557Sjimharris */
182230557Sjimharris
183230557Sjimharrisstatic void
184230557Sjimharrisnda_nvme_flush(struct nda_softc *softc, struct ccb_nvmeio *nvmeio)
185230557Sjimharris{
186230557Sjimharris	cam_fill_nvmeio(nvmeio,
187230557Sjimharris	    0,			/* retries */
188230557Sjimharris	    ndadone,		/* cbfcnp */
189230557Sjimharris	    CAM_DIR_NONE,	/* flags */
190230557Sjimharris	    NULL,		/* data_ptr */
191230557Sjimharris	    0,			/* dxfer_len */
192234106Sjimharris	    nda_default_timeout * 1000); /* timeout 5s */
193230557Sjimharris	nvme_ns_flush_cmd(&nvmeio->cmd, softc->nsid);
194230557Sjimharris}
195230557Sjimharris
196230557Sjimharrisstatic void
197230557Sjimharrisnda_nvme_trim(struct nda_softc *softc, struct ccb_nvmeio *nvmeio,
198230557Sjimharris    void *payload, uint32_t num_ranges)
199230557Sjimharris{
200230557Sjimharris	cam_fill_nvmeio(nvmeio,
201230557Sjimharris	    0,			/* retries */
202230557Sjimharris	    ndadone,		/* cbfcnp */
203230557Sjimharris	    CAM_DIR_OUT,	/* flags */
204230557Sjimharris	    payload,		/* data_ptr */
205230557Sjimharris	    num_ranges * sizeof(struct nvme_dsm_range), /* dxfer_len */
206230557Sjimharris	    nda_default_timeout * 1000); /* timeout 5s */
207230557Sjimharris	nvme_ns_trim_cmd(&nvmeio->cmd, softc->nsid, num_ranges);
208230557Sjimharris}
209230557Sjimharris
210230557Sjimharrisstatic void
211230557Sjimharrisnda_nvme_write(struct nda_softc *softc, struct ccb_nvmeio *nvmeio,
212230557Sjimharris    void *payload, uint64_t lba, uint32_t len, uint32_t count)
213230557Sjimharris{
214230557Sjimharris	cam_fill_nvmeio(nvmeio,
215230557Sjimharris	    0,			/* retries */
216230557Sjimharris	    ndadone,		/* cbfcnp */
217230557Sjimharris	    CAM_DIR_OUT,	/* flags */
218230557Sjimharris	    payload,		/* data_ptr */
219230557Sjimharris	    len,		/* dxfer_len */
220230557Sjimharris	    nda_default_timeout * 1000); /* timeout 5s */
221230557Sjimharris	nvme_ns_write_cmd(&nvmeio->cmd, softc->nsid, lba, count);
222230557Sjimharris}
223230557Sjimharris
224234106Sjimharrisstatic void
225234106Sjimharrisnda_nvme_rw_bio(struct nda_softc *softc, struct ccb_nvmeio *nvmeio,
226234106Sjimharris    struct bio *bp, uint32_t rwcmd)
227234106Sjimharris{
228234106Sjimharris	int flags = rwcmd == NVME_OPC_READ ? CAM_DIR_IN : CAM_DIR_OUT;
229234106Sjimharris	void *payload;
230234106Sjimharris	uint64_t lba;
231234106Sjimharris	uint32_t count;
232234106Sjimharris
233234106Sjimharris	if (bp->bio_flags & BIO_UNMAPPED) {
234234106Sjimharris		flags |= CAM_DATA_BIO;
235234106Sjimharris		payload = bp;
236234106Sjimharris	} else {
237234106Sjimharris		payload = bp->bio_data;
238234106Sjimharris	}
239234106Sjimharris
240234106Sjimharris	lba = bp->bio_pblkno;
241234106Sjimharris	count = bp->bio_bcount / softc->disk->d_sectorsize;
242234106Sjimharris
243234106Sjimharris	cam_fill_nvmeio(nvmeio,
244234106Sjimharris	    0,			/* retries */
245234106Sjimharris	    ndadone,		/* cbfcnp */
246234106Sjimharris	    flags,		/* flags */
247234106Sjimharris	    payload,		/* data_ptr */
248234106Sjimharris	    bp->bio_bcount,	/* dxfer_len */
249234106Sjimharris	    nda_default_timeout * 1000);		/* timeout 5s */
250234106Sjimharris	nvme_ns_rw_cmd(&nvmeio->cmd, rwcmd, softc->nsid, lba, count);
251234106Sjimharris}
252234106Sjimharris
253234106Sjimharrisstatic int
254234106Sjimharrisndaopen(struct disk *dp)
255234106Sjimharris{
256234106Sjimharris	struct cam_periph *periph;
257230557Sjimharris	struct nda_softc *softc;
258230557Sjimharris	int error;
259230557Sjimharris
260230557Sjimharris	periph = (struct cam_periph *)dp->d_drv1;
261230557Sjimharris	if (cam_periph_acquire(periph) != CAM_REQ_CMP) {
262230557Sjimharris		return(ENXIO);
263230557Sjimharris	}
264230557Sjimharris
265230557Sjimharris	cam_periph_lock(periph);
266230557Sjimharris	if ((error = cam_periph_hold(periph, PRIBIO|PCATCH)) != 0) {
267230557Sjimharris		cam_periph_unlock(periph);
268230557Sjimharris		cam_periph_release(periph);
269230557Sjimharris		return (error);
270230557Sjimharris	}
271230557Sjimharris
272230557Sjimharris	CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH,
273230557Sjimharris	    ("ndaopen\n"));
274230557Sjimharris
275230557Sjimharris	softc = (struct nda_softc *)periph->softc;
276230557Sjimharris	softc->flags |= NDA_FLAG_OPEN;
277230557Sjimharris
278230557Sjimharris	cam_periph_unhold(periph);
279230557Sjimharris	cam_periph_unlock(periph);
280230557Sjimharris	return (0);
281230557Sjimharris}
282230557Sjimharris
283230557Sjimharrisstatic int
284230557Sjimharrisndaclose(struct disk *dp)
285230557Sjimharris{
286230557Sjimharris	struct	cam_periph *periph;
287230557Sjimharris	struct	nda_softc *softc;
288230557Sjimharris	union ccb *ccb;
289230557Sjimharris	int error;
290230557Sjimharris
291230557Sjimharris	periph = (struct cam_periph *)dp->d_drv1;
292230557Sjimharris	softc = (struct nda_softc *)periph->softc;
293230557Sjimharris	cam_periph_lock(periph);
294230557Sjimharris
295230557Sjimharris	CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH,
296230557Sjimharris	    ("ndaclose\n"));
297230557Sjimharris
298230557Sjimharris	if ((softc->flags & NDA_FLAG_DIRTY) != 0 &&
299230557Sjimharris	    (periph->flags & CAM_PERIPH_INVALID) == 0 &&
300230557Sjimharris	    cam_periph_hold(periph, PRIBIO) == 0) {
301230557Sjimharris
302230557Sjimharris		ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL);
303230557Sjimharris		nda_nvme_flush(softc, &ccb->nvmeio);
304230557Sjimharris		error = cam_periph_runccb(ccb, ndaerror, /*cam_flags*/0,
305230557Sjimharris		    /*sense_flags*/0, softc->disk->d_devstat);
306230557Sjimharris
307230557Sjimharris		if (error != 0)
308230557Sjimharris			xpt_print(periph->path, "Synchronize cache failed\n");
309230557Sjimharris		else
310230557Sjimharris			softc->flags &= ~NDA_FLAG_DIRTY;
311230557Sjimharris		xpt_release_ccb(ccb);
312230557Sjimharris		cam_periph_unhold(periph);
313230557Sjimharris	}
314230557Sjimharris
315230557Sjimharris	softc->flags &= ~NDA_FLAG_OPEN;
316230557Sjimharris
317230557Sjimharris	while (softc->refcount != 0)
318230557Sjimharris		cam_periph_sleep(periph, &softc->refcount, PRIBIO, "ndaclose", 1);
319230557Sjimharris	cam_periph_unlock(periph);
320230557Sjimharris	cam_periph_release(periph);
321230557Sjimharris	return (0);
322230557Sjimharris}
323230557Sjimharris
324230557Sjimharrisstatic void
325230557Sjimharrisndaschedule(struct cam_periph *periph)
326230557Sjimharris{
327230557Sjimharris	struct nda_softc *softc = (struct nda_softc *)periph->softc;
328230557Sjimharris
329230557Sjimharris	if (softc->state != NDA_STATE_NORMAL)
330230557Sjimharris		return;
331230557Sjimharris
332230557Sjimharris	cam_iosched_schedule(softc->cam_iosched, periph);
333230557Sjimharris}
334230557Sjimharris
335230557Sjimharris/*
336230557Sjimharris * Actually translate the requested transfer into one the physical driver
337230557Sjimharris * can understand.  The transfer is described by a buf and will include
338230557Sjimharris * only one physical transfer.
339230557Sjimharris */
340230557Sjimharrisstatic void
341230557Sjimharrisndastrategy(struct bio *bp)
342230557Sjimharris{
343230557Sjimharris	struct cam_periph *periph;
344230557Sjimharris	struct nda_softc *softc;
345230557Sjimharris
346230557Sjimharris	periph = (struct cam_periph *)bp->bio_disk->d_drv1;
347230557Sjimharris	softc = (struct nda_softc *)periph->softc;
348230557Sjimharris
349230557Sjimharris	cam_periph_lock(periph);
350230557Sjimharris
351230557Sjimharris	CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastrategy(%p)\n", bp));
352230557Sjimharris
353230557Sjimharris	/*
354230557Sjimharris	 * If the device has been made invalid, error out
355230557Sjimharris	 */
356230557Sjimharris	if ((periph->flags & CAM_PERIPH_INVALID) != 0) {
357230557Sjimharris		cam_periph_unlock(periph);
358230557Sjimharris		biofinish(bp, NULL, ENXIO);
359230557Sjimharris		return;
360230557Sjimharris	}
361230557Sjimharris
362230557Sjimharris	/*
363230557Sjimharris	 * Place it in the queue of disk activities for this disk
364230557Sjimharris	 */
365230557Sjimharris	cam_iosched_queue_work(softc->cam_iosched, bp);
366230557Sjimharris
367230557Sjimharris	/*
368230557Sjimharris	 * Schedule ourselves for performing the work.
369230557Sjimharris	 */
370230557Sjimharris	ndaschedule(periph);
371230557Sjimharris	cam_periph_unlock(periph);
372230557Sjimharris
373230557Sjimharris	return;
374230557Sjimharris}
375230557Sjimharris
376230557Sjimharrisstatic int
377230557Sjimharrisndadump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t length)
378230557Sjimharris{
379230557Sjimharris	struct	    cam_periph *periph;
380230557Sjimharris	struct	    nda_softc *softc;
381230557Sjimharris	u_int	    secsize;
382230557Sjimharris	union	    ccb ccb;
383230557Sjimharris	struct	    disk *dp;
384230557Sjimharris	uint64_t    lba;
385230557Sjimharris	uint32_t    count;
386230557Sjimharris	int	    error = 0;
387230557Sjimharris
388230557Sjimharris	dp = arg;
389230557Sjimharris	periph = dp->d_drv1;
390230557Sjimharris	softc = (struct nda_softc *)periph->softc;
391230557Sjimharris	cam_periph_lock(periph);
392230557Sjimharris	secsize = softc->disk->d_sectorsize;
393230557Sjimharris	lba = offset / secsize;
394230557Sjimharris	count = length / secsize;
395230557Sjimharris
396230557Sjimharris	if ((periph->flags & CAM_PERIPH_INVALID) != 0) {
397230557Sjimharris		cam_periph_unlock(periph);
398230557Sjimharris		return (ENXIO);
399230557Sjimharris	}
400230557Sjimharris
401230557Sjimharris	if (length > 0) {
402230557Sjimharris		xpt_setup_ccb(&ccb.ccb_h, periph->path, CAM_PRIORITY_NORMAL);
403230557Sjimharris		ccb.ccb_h.ccb_state = NDA_CCB_DUMP;
404230557Sjimharris		nda_nvme_write(softc, &ccb.nvmeio, virtual, lba, length, count);
405230557Sjimharris		xpt_polled_action(&ccb);
406230557Sjimharris
407230557Sjimharris		error = cam_periph_error(&ccb,
408230557Sjimharris		    0, SF_NO_RECOVERY | SF_NO_RETRY, NULL);
409230557Sjimharris		if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0)
410230557Sjimharris			cam_release_devq(ccb.ccb_h.path, /*relsim_flags*/0,
411230557Sjimharris			    /*reduction*/0, /*timeout*/0, /*getcount_only*/0);
412230557Sjimharris		if (error != 0)
413230557Sjimharris			printf("Aborting dump due to I/O error.\n");
414230557Sjimharris
415230557Sjimharris		cam_periph_unlock(periph);
416230557Sjimharris		return (error);
417230557Sjimharris	}
418230557Sjimharris
419230557Sjimharris	/* Flush */
420230557Sjimharris	xpt_setup_ccb(&ccb.ccb_h, periph->path, CAM_PRIORITY_NORMAL);
421230557Sjimharris
422230557Sjimharris	ccb.ccb_h.ccb_state = NDA_CCB_DUMP;
423230557Sjimharris	nda_nvme_flush(softc, &ccb.nvmeio);
424230557Sjimharris	xpt_polled_action(&ccb);
425230557Sjimharris
426230557Sjimharris	error = cam_periph_error(&ccb,
427230557Sjimharris	    0, SF_NO_RECOVERY | SF_NO_RETRY, NULL);
428230557Sjimharris	if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0)
429230557Sjimharris		cam_release_devq(ccb.ccb_h.path, /*relsim_flags*/0,
430230557Sjimharris		    /*reduction*/0, /*timeout*/0, /*getcount_only*/0);
431230557Sjimharris	if (error != 0)
432230557Sjimharris		xpt_print(periph->path, "flush cmd failed\n");
433230557Sjimharris	cam_periph_unlock(periph);
434230557Sjimharris	return (error);
435230557Sjimharris}
436230557Sjimharris
437230557Sjimharrisstatic void
438230557Sjimharrisndainit(void)
439230557Sjimharris{
440230557Sjimharris	cam_status status;
441230557Sjimharris
442230557Sjimharris	/*
443230557Sjimharris	 * Install a global async callback.  This callback will
444230557Sjimharris	 * receive async callbacks like "new device found".
445230557Sjimharris	 */
446230557Sjimharris	status = xpt_register_async(AC_FOUND_DEVICE, ndaasync, NULL, NULL);
447230557Sjimharris
448230557Sjimharris	if (status != CAM_REQ_CMP) {
449230557Sjimharris		printf("nda: Failed to attach master async callback "
450230557Sjimharris		       "due to status 0x%x!\n", status);
451230557Sjimharris	} else if (nda_send_ordered) {
452230557Sjimharris
453230557Sjimharris		/* Register our event handlers */
454230557Sjimharris		if ((EVENTHANDLER_REGISTER(power_suspend, ndasuspend,
455230557Sjimharris					   NULL, EVENTHANDLER_PRI_LAST)) == NULL)
456230557Sjimharris		    printf("ndainit: power event registration failed!\n");
457230557Sjimharris		if ((EVENTHANDLER_REGISTER(shutdown_post_sync, ndashutdown,
458230557Sjimharris					   NULL, SHUTDOWN_PRI_DEFAULT)) == NULL)
459230557Sjimharris		    printf("ndainit: shutdown event registration failed!\n");
460230557Sjimharris	}
461230557Sjimharris}
462230557Sjimharris
463230557Sjimharris/*
464230557Sjimharris * Callback from GEOM, called when it has finished cleaning up its
465230557Sjimharris * resources.
466230557Sjimharris */
467230557Sjimharrisstatic void
468230557Sjimharrisndadiskgonecb(struct disk *dp)
469230557Sjimharris{
470230557Sjimharris	struct cam_periph *periph;
471230557Sjimharris
472230557Sjimharris	periph = (struct cam_periph *)dp->d_drv1;
473230557Sjimharris
474230557Sjimharris	cam_periph_release(periph);
475230557Sjimharris}
476230557Sjimharris
477230557Sjimharrisstatic void
478230557Sjimharrisndaoninvalidate(struct cam_periph *periph)
479230557Sjimharris{
480230557Sjimharris	struct nda_softc *softc;
481230557Sjimharris
482230557Sjimharris	softc = (struct nda_softc *)periph->softc;
483230557Sjimharris
484230557Sjimharris	/*
485230557Sjimharris	 * De-register any async callbacks.
486230557Sjimharris	 */
487230557Sjimharris	xpt_register_async(0, ndaasync, periph, periph->path);
488230557Sjimharris#ifdef CAM_IO_STATS
489230557Sjimharris	softc->invalidations++;
490230557Sjimharris#endif
491230557Sjimharris
492230557Sjimharris	/*
493230557Sjimharris	 * Return all queued I/O with ENXIO.
494230557Sjimharris	 * XXX Handle any transactions queued to the card
495230557Sjimharris	 *     with XPT_ABORT_CCB.
496230557Sjimharris	 */
497230557Sjimharris	cam_iosched_flush(softc->cam_iosched, NULL, ENXIO);
498230557Sjimharris
499230557Sjimharris	disk_gone(softc->disk);
500230557Sjimharris}
501230557Sjimharris
502230557Sjimharrisstatic void
503230557Sjimharrisndacleanup(struct cam_periph *periph)
504230557Sjimharris{
505230557Sjimharris	struct nda_softc *softc;
506230557Sjimharris
507230557Sjimharris	softc = (struct nda_softc *)periph->softc;
508230557Sjimharris
509230557Sjimharris	cam_periph_unlock(periph);
510230557Sjimharris
511230557Sjimharris	cam_iosched_fini(softc->cam_iosched);
512230557Sjimharris
513230557Sjimharris	/*
514230557Sjimharris	 * If we can't free the sysctl tree, oh well...
515230557Sjimharris	 */
516230557Sjimharris	if ((softc->flags & NDA_FLAG_SCTX_INIT) != 0) {
517230557Sjimharris#ifdef CAM_IO_STATS
518230557Sjimharris		if (sysctl_ctx_free(&softc->sysctl_stats_ctx) != 0)
519230557Sjimharris			xpt_print(periph->path,
520230557Sjimharris			    "can't remove sysctl stats context\n");
521230557Sjimharris#endif
522230557Sjimharris		if (sysctl_ctx_free(&softc->sysctl_ctx) != 0)
523230557Sjimharris			xpt_print(periph->path,
524230557Sjimharris			    "can't remove sysctl context\n");
525230557Sjimharris	}
526230557Sjimharris
527230557Sjimharris	disk_destroy(softc->disk);
528230557Sjimharris	free(softc, M_DEVBUF);
529230557Sjimharris	cam_periph_lock(periph);
530230557Sjimharris}
531230557Sjimharris
532230557Sjimharrisstatic void
533230557Sjimharrisndaasync(void *callback_arg, u_int32_t code,
534230557Sjimharris	struct cam_path *path, void *arg)
535230557Sjimharris{
536230557Sjimharris	struct cam_periph *periph;
537230557Sjimharris
538230557Sjimharris	periph = (struct cam_periph *)callback_arg;
539230557Sjimharris	switch (code) {
540230557Sjimharris	case AC_FOUND_DEVICE:
541230557Sjimharris	{
542230557Sjimharris		struct ccb_getdev *cgd;
543230557Sjimharris		cam_status status;
544230557Sjimharris
545230557Sjimharris		cgd = (struct ccb_getdev *)arg;
546230557Sjimharris		if (cgd == NULL)
547230557Sjimharris			break;
548230557Sjimharris
549230557Sjimharris		if (cgd->protocol != PROTO_NVME)
550230557Sjimharris			break;
551230557Sjimharris
552230557Sjimharris		/*
553230557Sjimharris		 * Allocate a peripheral instance for
554230557Sjimharris		 * this device and start the probe
555230557Sjimharris		 * process.
556230557Sjimharris		 */
557230557Sjimharris		status = cam_periph_alloc(ndaregister, ndaoninvalidate,
558230557Sjimharris					  ndacleanup, ndastart,
559230557Sjimharris					  "nda", CAM_PERIPH_BIO,
560230557Sjimharris					  path, ndaasync,
561230557Sjimharris					  AC_FOUND_DEVICE, cgd);
562230557Sjimharris
563230557Sjimharris		if (status != CAM_REQ_CMP
564230557Sjimharris		 && status != CAM_REQ_INPROG)
565230557Sjimharris			printf("ndaasync: Unable to attach to new device "
566230557Sjimharris				"due to status 0x%x\n", status);
567230557Sjimharris		break;
568230557Sjimharris	}
569230557Sjimharris	case AC_ADVINFO_CHANGED:
570230557Sjimharris	{
571230557Sjimharris		uintptr_t buftype;
572230557Sjimharris
573230557Sjimharris		buftype = (uintptr_t)arg;
574230557Sjimharris		if (buftype == CDAI_TYPE_PHYS_PATH) {
575230557Sjimharris			struct nda_softc *softc;
576230557Sjimharris
577230557Sjimharris			softc = periph->softc;
578230557Sjimharris			disk_attr_changed(softc->disk, "GEOM::physpath",
579230557Sjimharris					  M_NOWAIT);
580230557Sjimharris		}
581230557Sjimharris		break;
582230557Sjimharris	}
583230557Sjimharris	case AC_LOST_DEVICE:
584230557Sjimharris	default:
585230557Sjimharris		cam_periph_async(periph, code, path, arg);
586230557Sjimharris		break;
587230557Sjimharris	}
588230557Sjimharris}
589230557Sjimharris
590230557Sjimharrisstatic void
591230557Sjimharrisndasysctlinit(void *context, int pending)
592230557Sjimharris{
593230557Sjimharris	struct cam_periph *periph;
594230557Sjimharris	struct nda_softc *softc;
595230557Sjimharris	char tmpstr[80], tmpstr2[80];
596230557Sjimharris
597230557Sjimharris	periph = (struct cam_periph *)context;
598230557Sjimharris
599230557Sjimharris	/* periph was held for us when this task was enqueued */
600230557Sjimharris	if ((periph->flags & CAM_PERIPH_INVALID) != 0) {
601230557Sjimharris		cam_periph_release(periph);
602230557Sjimharris		return;
603230557Sjimharris	}
604230557Sjimharris
605230557Sjimharris	softc = (struct nda_softc *)periph->softc;
606230557Sjimharris	snprintf(tmpstr, sizeof(tmpstr), "CAM NDA unit %d", periph->unit_number);
607230557Sjimharris	snprintf(tmpstr2, sizeof(tmpstr2), "%d", periph->unit_number);
608230557Sjimharris
609230557Sjimharris	sysctl_ctx_init(&softc->sysctl_ctx);
610230557Sjimharris	softc->flags |= NDA_FLAG_SCTX_INIT;
611230557Sjimharris	softc->sysctl_tree = SYSCTL_ADD_NODE(&softc->sysctl_ctx,
612230557Sjimharris		SYSCTL_STATIC_CHILDREN(_kern_cam_nda), OID_AUTO, tmpstr2,
613230557Sjimharris		CTLFLAG_RD, 0, tmpstr);
614230557Sjimharris	if (softc->sysctl_tree == NULL) {
615230557Sjimharris		printf("ndasysctlinit: unable to allocate sysctl tree\n");
616230557Sjimharris		cam_periph_release(periph);
617230557Sjimharris		return;
618230557Sjimharris	}
619230557Sjimharris
620230557Sjimharris	SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
621230557Sjimharris		OID_AUTO, "unmapped_io", CTLFLAG_RD | CTLFLAG_MPSAFE,
622230557Sjimharris		&softc->unmappedio, 0, "Unmapped I/O leaf");
623230557Sjimharris
624230557Sjimharris	SYSCTL_ADD_INT(&softc->sysctl_ctx,
625230557Sjimharris		       SYSCTL_CHILDREN(softc->sysctl_tree),
626230557Sjimharris		       OID_AUTO,
627230557Sjimharris		       "rotating",
628230557Sjimharris		       CTLFLAG_RD | CTLFLAG_MPSAFE,
629230557Sjimharris		       &nda_rotating_media,
630230557Sjimharris		       0,
631230557Sjimharris		       "Rotating media");
632230557Sjimharris
633230557Sjimharris#ifdef CAM_IO_STATS
634230557Sjimharris	softc->sysctl_stats_tree = SYSCTL_ADD_NODE(&softc->sysctl_stats_ctx,
635230557Sjimharris		SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "stats",
636230557Sjimharris		CTLFLAG_RD, 0, "Statistics");
637230557Sjimharris	if (softc->sysctl_stats_tree == NULL) {
638230557Sjimharris		printf("ndasysctlinit: unable to allocate sysctl tree for stats\n");
639230557Sjimharris		cam_periph_release(periph);
640230557Sjimharris		return;
641231296Sjimharris	}
642230557Sjimharris	SYSCTL_ADD_INT(&softc->sysctl_stats_ctx,
643230557Sjimharris		SYSCTL_CHILDREN(softc->sysctl_stats_tree),
644230557Sjimharris		OID_AUTO, "timeouts", CTLFLAG_RD | CTLFLAG_MPSAFE,
645230557Sjimharris		&softc->timeouts, 0,
646230557Sjimharris		"Device timeouts reported by the SIM");
647230557Sjimharris	SYSCTL_ADD_INT(&softc->sysctl_stats_ctx,
648231296Sjimharris		SYSCTL_CHILDREN(softc->sysctl_stats_tree),
649230557Sjimharris		OID_AUTO, "errors", CTLFLAG_RD | CTLFLAG_MPSAFE,
650231296Sjimharris		&softc->errors, 0,
651230557Sjimharris		"Transport errors reported by the SIM.");
652230557Sjimharris	SYSCTL_ADD_INT(&softc->sysctl_stats_ctx,
653230557Sjimharris		SYSCTL_CHILDREN(softc->sysctl_stats_tree),
654230557Sjimharris		OID_AUTO, "pack_invalidations", CTLFLAG_RD | CTLFLAG_MPSAFE,
655230557Sjimharris		&softc->invalidations, 0,
656230557Sjimharris		"Device pack invalidations.");
657230557Sjimharris#endif
658230557Sjimharris
659230557Sjimharris	cam_iosched_sysctl_init(softc->cam_iosched, &softc->sysctl_ctx,
660230557Sjimharris	    softc->sysctl_tree);
661230557Sjimharris
662230557Sjimharris	cam_periph_release(periph);
663230557Sjimharris}
664230557Sjimharris
665231296Sjimharrisstatic int
666230557Sjimharrisndagetattr(struct bio *bp)
667230557Sjimharris{
668230557Sjimharris	int ret;
669230557Sjimharris	struct cam_periph *periph;
670230557Sjimharris
671230557Sjimharris	periph = (struct cam_periph *)bp->bio_disk->d_drv1;
672230557Sjimharris	cam_periph_lock(periph);
673230557Sjimharris	ret = xpt_getattr(bp->bio_data, bp->bio_length, bp->bio_attribute,
674230557Sjimharris	    periph->path);
675230557Sjimharris	cam_periph_unlock(periph);
676230557Sjimharris	if (ret == 0)
677230557Sjimharris		bp->bio_completed = bp->bio_length;
678230557Sjimharris	return ret;
679230557Sjimharris}
680230557Sjimharris
681230557Sjimharrisstatic cam_status
682230557Sjimharrisndaregister(struct cam_periph *periph, void *arg)
683230557Sjimharris{
684230557Sjimharris	struct nda_softc *softc;
685230557Sjimharris	struct disk *disk;
686230557Sjimharris	struct ccb_pathinq cpi;
687230557Sjimharris	struct ccb_getdev *cgd;
688230557Sjimharris	const struct nvme_namespace_data *nsd;
689230557Sjimharris	const struct nvme_controller_data *cd;
690230557Sjimharris	char   announce_buf[80];
691230557Sjimharris//	caddr_t match;
692230557Sjimharris	u_int maxio;
693230557Sjimharris	int quirks;
694230557Sjimharris
695230557Sjimharris	cgd = (struct ccb_getdev *)arg;
696230557Sjimharris	if (cgd == NULL) {
697230557Sjimharris		printf("ndaregister: no getdev CCB, can't register device\n");
698230557Sjimharris		return(CAM_REQ_CMP_ERR);
699230557Sjimharris	}
700230557Sjimharris	nsd = cgd->nvme_data;
701230557Sjimharris	cd = cgd->nvme_cdata;
702230557Sjimharris
703230557Sjimharris	softc = (struct nda_softc *)malloc(sizeof(*softc), M_DEVBUF,
704230557Sjimharris	    M_NOWAIT | M_ZERO);
705230557Sjimharris
706230557Sjimharris	if (softc == NULL) {
707230557Sjimharris		printf("ndaregister: Unable to probe new device. "
708230557Sjimharris		    "Unable to allocate softc\n");
709230557Sjimharris		return(CAM_REQ_CMP_ERR);
710230557Sjimharris	}
711230557Sjimharris
712230557Sjimharris	if (cam_iosched_init(&softc->cam_iosched, periph) != 0) {
713230557Sjimharris		printf("ndaregister: Unable to probe new device. "
714230557Sjimharris		       "Unable to allocate iosched memory\n");
715230557Sjimharris		return(CAM_REQ_CMP_ERR);
716230557Sjimharris	}
717230557Sjimharris
718230557Sjimharris	/* ident_data parsing */
719230557Sjimharris
720230557Sjimharris	periph->softc = softc;
721230557Sjimharris
722230557Sjimharris#if 0
723230557Sjimharris	/*
724230557Sjimharris	 * See if this device has any quirks.
725230557Sjimharris	 */
726230557Sjimharris	match = cam_quirkmatch((caddr_t)&cgd->ident_data,
727230557Sjimharris			       (caddr_t)nda_quirk_table,
728230557Sjimharris			       sizeof(nda_quirk_table)/sizeof(*nda_quirk_table),
729230557Sjimharris			       sizeof(*nda_quirk_table), ata_identify_match);
730230557Sjimharris	if (match != NULL)
731230557Sjimharris		softc->quirks = ((struct nda_quirk_entry *)match)->quirks;
732230557Sjimharris	else
733230557Sjimharris#endif
734230557Sjimharris		softc->quirks = NDA_Q_NONE;
735230557Sjimharris
736230557Sjimharris	bzero(&cpi, sizeof(cpi));
737230557Sjimharris	xpt_setup_ccb(&cpi.ccb_h, periph->path, CAM_PRIORITY_NONE);
738230557Sjimharris	cpi.ccb_h.func_code = XPT_PATH_INQ;
739230557Sjimharris	xpt_action((union ccb *)&cpi);
740230557Sjimharris
741230557Sjimharris	TASK_INIT(&softc->sysctl_task, 0, ndasysctlinit, periph);
742230557Sjimharris
743230557Sjimharris	/*
744230557Sjimharris	 * The name space ID is the lun, save it for later I/O
745230557Sjimharris	 */
746230557Sjimharris	softc->nsid = (uint16_t)xpt_path_lun_id(periph->path);
747230557Sjimharris
748230557Sjimharris	/*
749230557Sjimharris	 * Register this media as a disk
750230557Sjimharris	 */
751230557Sjimharris	(void)cam_periph_hold(periph, PRIBIO);
752230557Sjimharris	cam_periph_unlock(periph);
753230557Sjimharris	snprintf(announce_buf, sizeof(announce_buf),
754230557Sjimharris	    "kern.cam.nda.%d.quirks", periph->unit_number);
755230557Sjimharris	quirks = softc->quirks;
756230557Sjimharris	TUNABLE_INT_FETCH(announce_buf, &quirks);
757230557Sjimharris	softc->quirks = quirks;
758230557Sjimharris	cam_iosched_set_sort_queue(softc->cam_iosched, 0);
759230557Sjimharris	softc->disk = disk = disk_alloc();
760230557Sjimharris	strlcpy(softc->disk->d_descr, cd->mn,
761230557Sjimharris	    MIN(sizeof(softc->disk->d_descr), sizeof(cd->mn)));
762230557Sjimharris	strlcpy(softc->disk->d_ident, cd->sn,
763230557Sjimharris	    MIN(sizeof(softc->disk->d_ident), sizeof(cd->sn)));
764230557Sjimharris	disk->d_rotation_rate = 0;	/* Spinning rust need not apply */
765230557Sjimharris	disk->d_open = ndaopen;
766230557Sjimharris	disk->d_close = ndaclose;
767230557Sjimharris	disk->d_strategy = ndastrategy;
768230557Sjimharris	disk->d_getattr = ndagetattr;
769230557Sjimharris	disk->d_dump = ndadump;
770230557Sjimharris	disk->d_gone = ndadiskgonecb;
771230557Sjimharris	disk->d_name = "nda";
772230557Sjimharris	disk->d_drv1 = periph;
773230557Sjimharris	disk->d_unit = periph->unit_number;
774230557Sjimharris	maxio = cpi.maxio;		/* Honor max I/O size of SIM */
775230557Sjimharris	if (maxio == 0)
776230557Sjimharris		maxio = DFLTPHYS;	/* traditional default */
777230557Sjimharris	else if (maxio > MAXPHYS)
778230557Sjimharris		maxio = MAXPHYS;	/* for safety */
779230557Sjimharris	disk->d_maxsize = maxio;
780230557Sjimharris	disk->d_sectorsize = 1 << nsd->lbaf[nsd->flbas.format].lbads;
781230557Sjimharris	disk->d_mediasize = (off_t)(disk->d_sectorsize * nsd->nsze);
782230557Sjimharris	disk->d_delmaxsize = disk->d_mediasize;
783230557Sjimharris	disk->d_flags = DISKFLAG_DIRECT_COMPLETION;
784230557Sjimharris//	if (cd->oncs.dsm) // XXX broken?
785230557Sjimharris		disk->d_flags |= DISKFLAG_CANDELETE;
786230557Sjimharris	if (cd->vwc.present)
787230557Sjimharris		disk->d_flags |= DISKFLAG_CANFLUSHCACHE;
788230557Sjimharris	if ((cpi.hba_misc & PIM_UNMAPPED) != 0) {
789230557Sjimharris		disk->d_flags |= DISKFLAG_UNMAPPED_BIO;
790230557Sjimharris		softc->unmappedio = 1;
791230557Sjimharris	}
792230557Sjimharris	/*
793230557Sjimharris	 * d_ident and d_descr are both far bigger than the length of either
794230557Sjimharris	 *  the serial or model number strings.
795230557Sjimharris	 */
796230557Sjimharris	nvme_strvis(disk->d_descr, cd->mn,
797230557Sjimharris	    sizeof(disk->d_descr), NVME_MODEL_NUMBER_LENGTH);
798230557Sjimharris	nvme_strvis(disk->d_ident, cd->sn,
799230557Sjimharris	    sizeof(disk->d_ident), NVME_SERIAL_NUMBER_LENGTH);
800230557Sjimharris	disk->d_hba_vendor = cpi.hba_vendor;
801230557Sjimharris	disk->d_hba_device = cpi.hba_device;
802230557Sjimharris	disk->d_hba_subvendor = cpi.hba_subvendor;
803230557Sjimharris	disk->d_hba_subdevice = cpi.hba_subdevice;
804230557Sjimharris	disk->d_stripesize = disk->d_sectorsize;
805230557Sjimharris	disk->d_stripeoffset = 0;
806230557Sjimharris	disk->d_devstat = devstat_new_entry(periph->periph_name,
807230557Sjimharris	    periph->unit_number, disk->d_sectorsize,
808230557Sjimharris	    DEVSTAT_ALL_SUPPORTED,
809230557Sjimharris	    DEVSTAT_TYPE_DIRECT | XPORT_DEVSTAT_TYPE(cpi.transport),
810230557Sjimharris	    DEVSTAT_PRIORITY_DISK);
811230557Sjimharris
812230557Sjimharris	/*
813230557Sjimharris	 * Acquire a reference to the periph before we register with GEOM.
814230557Sjimharris	 * We'll release this reference once GEOM calls us back (via
815230557Sjimharris	 * ndadiskgonecb()) telling us that our provider has been freed.
816230557Sjimharris	 */
817230557Sjimharris	if (cam_periph_acquire(periph) != CAM_REQ_CMP) {
818230557Sjimharris		xpt_print(periph->path, "%s: lost periph during "
819230557Sjimharris			  "registration!\n", __func__);
820230557Sjimharris		cam_periph_lock(periph);
821230557Sjimharris		return (CAM_REQ_CMP_ERR);
822230557Sjimharris	}
823230557Sjimharris	disk_create(softc->disk, DISK_VERSION);
824230557Sjimharris	cam_periph_lock(periph);
825230557Sjimharris	cam_periph_unhold(periph);
826230557Sjimharris
827230557Sjimharris	snprintf(announce_buf, sizeof(announce_buf),
828230557Sjimharris		"%juMB (%ju %u byte sectors)",
829230557Sjimharris	    (uintmax_t)((uintmax_t)disk->d_mediasize / (1024*1024)),
830230557Sjimharris		(uintmax_t)disk->d_mediasize / disk->d_sectorsize,
831230557Sjimharris		disk->d_sectorsize);
832230557Sjimharris	xpt_announce_periph(periph, announce_buf);
833230557Sjimharris	xpt_announce_quirks(periph, softc->quirks, NDA_Q_BIT_STRING);
834230557Sjimharris
835230557Sjimharris	/*
836230557Sjimharris	 * Create our sysctl variables, now that we know
837230557Sjimharris	 * we have successfully attached.
838230557Sjimharris	 */
839230557Sjimharris	if (cam_periph_acquire(periph) == CAM_REQ_CMP)
840230557Sjimharris		taskqueue_enqueue(taskqueue_thread, &softc->sysctl_task);
841230557Sjimharris
842230557Sjimharris	/*
843230557Sjimharris	 * Register for device going away and info about the drive
844230557Sjimharris	 * changing (though with NVMe, it can't)
845230557Sjimharris	 */
846230557Sjimharris	xpt_register_async(AC_LOST_DEVICE | AC_ADVINFO_CHANGED,
847230557Sjimharris	    ndaasync, periph, periph->path);
848230557Sjimharris
849230557Sjimharris	softc->state = NDA_STATE_NORMAL;
850230557Sjimharris	return(CAM_REQ_CMP);
851230557Sjimharris}
852230557Sjimharris
853230557Sjimharrisstatic void
854230557Sjimharrisndastart(struct cam_periph *periph, union ccb *start_ccb)
855230557Sjimharris{
856230557Sjimharris	struct nda_softc *softc = (struct nda_softc *)periph->softc;
857230557Sjimharris	struct ccb_nvmeio *nvmeio = &start_ccb->nvmeio;
858230557Sjimharris
859230557Sjimharris	CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastart\n"));
860230557Sjimharris
861230557Sjimharris	switch (softc->state) {
862230557Sjimharris	case NDA_STATE_NORMAL:
863230557Sjimharris	{
864230557Sjimharris		struct bio *bp;
865230557Sjimharris
866230557Sjimharris		bp = cam_iosched_next_bio(softc->cam_iosched);
867230557Sjimharris		CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastart: bio %p\n", bp));
868230557Sjimharris		if (bp == NULL) {
869230557Sjimharris			xpt_release_ccb(start_ccb);
870230557Sjimharris			break;
871230557Sjimharris		}
872230557Sjimharris
873230557Sjimharris		switch (bp->bio_cmd) {
874230557Sjimharris		case BIO_WRITE:
875230557Sjimharris			softc->flags |= NDA_FLAG_DIRTY;
876230557Sjimharris			/* FALLTHROUGH */
877230557Sjimharris		case BIO_READ:
878230557Sjimharris		{
879230557Sjimharris#ifdef NDA_TEST_FAILURE
880230557Sjimharris			int fail = 0;
881230557Sjimharris
882230557Sjimharris			/*
883230557Sjimharris			 * Support the failure ioctls.  If the command is a
884230557Sjimharris			 * read, and there are pending forced read errors, or
885230557Sjimharris			 * if a write and pending write errors, then fail this
886230557Sjimharris			 * operation with EIO.  This is useful for testing
887230557Sjimharris			 * purposes.  Also, support having every Nth read fail.
888230557Sjimharris			 *
889230557Sjimharris			 * This is a rather blunt tool.
890230557Sjimharris			 */
891230557Sjimharris			if (bp->bio_cmd == BIO_READ) {
892230557Sjimharris				if (softc->force_read_error) {
893230557Sjimharris					softc->force_read_error--;
894230557Sjimharris					fail = 1;
895230557Sjimharris				}
896230557Sjimharris				if (softc->periodic_read_error > 0) {
897230557Sjimharris					if (++softc->periodic_read_count >=
898230557Sjimharris					    softc->periodic_read_error) {
899230557Sjimharris						softc->periodic_read_count = 0;
900230557Sjimharris						fail = 1;
901230557Sjimharris					}
902230557Sjimharris				}
903230557Sjimharris			} else {
904230557Sjimharris				if (softc->force_write_error) {
905230557Sjimharris					softc->force_write_error--;
906230557Sjimharris					fail = 1;
907230557Sjimharris				}
908230557Sjimharris			}
909230557Sjimharris			if (fail) {
910230557Sjimharris				biofinish(bp, NULL, EIO);
911230557Sjimharris				xpt_release_ccb(start_ccb);
912230557Sjimharris				ndaschedule(periph);
913230557Sjimharris				return;
914230557Sjimharris			}
915231296Sjimharris#endif
916230557Sjimharris			KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 ||
917230557Sjimharris			    round_page(bp->bio_bcount + bp->bio_ma_offset) /
918230557Sjimharris			    PAGE_SIZE == bp->bio_ma_n,
919230557Sjimharris			    ("Short bio %p", bp));
920230557Sjimharris			nda_nvme_rw_bio(softc, &start_ccb->nvmeio, bp, bp->bio_cmd == BIO_READ ?
921230557Sjimharris			    NVME_OPC_READ : NVME_OPC_WRITE);
922230557Sjimharris			break;
923230557Sjimharris		}
924230557Sjimharris		case BIO_DELETE:
925230557Sjimharris		{
926230557Sjimharris			struct nvme_dsm_range *dsm_range;
927231296Sjimharris
928230557Sjimharris			dsm_range =
929230557Sjimharris			    malloc(sizeof(*dsm_range), M_NVMEDA, M_ZERO | M_WAITOK);
930230557Sjimharris			dsm_range->length =
931230557Sjimharris			    bp->bio_bcount / softc->disk->d_sectorsize;
932230557Sjimharris			dsm_range->starting_lba =
933230557Sjimharris			    bp->bio_offset / softc->disk->d_sectorsize;
934230557Sjimharris			bp->bio_driver2 = dsm_range;
935230557Sjimharris			nda_nvme_trim(softc, &start_ccb->nvmeio, dsm_range, 1);
936			start_ccb->ccb_h.ccb_state = NDA_CCB_TRIM;
937			start_ccb->ccb_h.flags |= CAM_UNLOCKED;
938			cam_iosched_submit_trim(softc->cam_iosched);	/* XXX */
939			goto out;
940		}
941		case BIO_FLUSH:
942			nda_nvme_flush(softc, nvmeio);
943			break;
944		}
945		start_ccb->ccb_h.ccb_state = NDA_CCB_BUFFER_IO;
946		start_ccb->ccb_h.flags |= CAM_UNLOCKED;
947out:
948		start_ccb->ccb_h.ccb_bp = bp;
949		softc->outstanding_cmds++;
950		softc->refcount++;
951		cam_periph_unlock(periph);
952		xpt_action(start_ccb);
953		cam_periph_lock(periph);
954		softc->refcount--;
955
956		/* May have more work to do, so ensure we stay scheduled */
957		ndaschedule(periph);
958		break;
959		}
960	}
961}
962
963static void
964ndadone(struct cam_periph *periph, union ccb *done_ccb)
965{
966	struct nda_softc *softc;
967	struct ccb_nvmeio *nvmeio = &done_ccb->nvmeio;
968	struct cam_path *path;
969	int state;
970
971	softc = (struct nda_softc *)periph->softc;
972	path = done_ccb->ccb_h.path;
973
974	CAM_DEBUG(path, CAM_DEBUG_TRACE, ("ndadone\n"));
975
976	state = nvmeio->ccb_h.ccb_state & NDA_CCB_TYPE_MASK;
977	switch (state) {
978	case NDA_CCB_BUFFER_IO:
979	case NDA_CCB_TRIM:
980	{
981		struct bio *bp;
982		int error;
983
984		cam_periph_lock(periph);
985		bp = (struct bio *)done_ccb->ccb_h.ccb_bp;
986		if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
987			error = ndaerror(done_ccb, 0, 0);
988			if (error == ERESTART) {
989				/* A retry was scheduled, so just return. */
990				cam_periph_unlock(periph);
991				return;
992			}
993			if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0)
994				cam_release_devq(path,
995						 /*relsim_flags*/0,
996						 /*reduction*/0,
997						 /*timeout*/0,
998						 /*getcount_only*/0);
999		} else {
1000			if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0)
1001				panic("REQ_CMP with QFRZN");
1002			error = 0;
1003		}
1004		bp->bio_error = error;
1005		if (error != 0) {
1006			bp->bio_resid = bp->bio_bcount;
1007			bp->bio_flags |= BIO_ERROR;
1008		} else {
1009			if (state == NDA_CCB_TRIM)
1010				bp->bio_resid = 0;
1011			else
1012				bp->bio_resid = nvmeio->resid;
1013			if (bp->bio_resid > 0)
1014				bp->bio_flags |= BIO_ERROR;
1015		}
1016		if (state == NDA_CCB_TRIM)
1017			free(bp->bio_driver2, M_NVMEDA);
1018		softc->outstanding_cmds--;
1019
1020		cam_iosched_bio_complete(softc->cam_iosched, bp, done_ccb);
1021		xpt_release_ccb(done_ccb);
1022		if (state == NDA_CCB_TRIM) {
1023#ifdef notyet
1024			TAILQ_HEAD(, bio) queue;
1025			struct bio *bp1;
1026
1027			TAILQ_INIT(&queue);
1028			TAILQ_CONCAT(&queue, &softc->trim_req.bps, bio_queue);
1029#endif
1030			cam_iosched_trim_done(softc->cam_iosched);
1031			ndaschedule(periph);
1032			cam_periph_unlock(periph);
1033#ifdef notyet
1034/* Not yet collapsing several BIO_DELETE requests into one TRIM */
1035			while ((bp1 = TAILQ_FIRST(&queue)) != NULL) {
1036				TAILQ_REMOVE(&queue, bp1, bio_queue);
1037				bp1->bio_error = error;
1038				if (error != 0) {
1039					bp1->bio_flags |= BIO_ERROR;
1040					bp1->bio_resid = bp1->bio_bcount;
1041				} else
1042					bp1->bio_resid = 0;
1043				biodone(bp1);
1044			}
1045#else
1046			biodone(bp);
1047#endif
1048		} else {
1049			ndaschedule(periph);
1050			cam_periph_unlock(periph);
1051			biodone(bp);
1052		}
1053		return;
1054	}
1055	case NDA_CCB_DUMP:
1056		/* No-op.  We're polling */
1057		return;
1058	default:
1059		break;
1060	}
1061	xpt_release_ccb(done_ccb);
1062}
1063
1064static int
1065ndaerror(union ccb *ccb, u_int32_t cam_flags, u_int32_t sense_flags)
1066{
1067	struct nda_softc *softc;
1068	struct cam_periph *periph;
1069
1070	periph = xpt_path_periph(ccb->ccb_h.path);
1071	softc = (struct nda_softc *)periph->softc;
1072
1073	switch (ccb->ccb_h.status & CAM_STATUS_MASK) {
1074	case CAM_CMD_TIMEOUT:
1075#ifdef CAM_IO_STATS
1076		softc->timeouts++;
1077#endif
1078		break;
1079	case CAM_REQ_ABORTED:
1080	case CAM_REQ_CMP_ERR:
1081	case CAM_REQ_TERMIO:
1082	case CAM_UNREC_HBA_ERROR:
1083	case CAM_DATA_RUN_ERR:
1084	case CAM_ATA_STATUS_ERROR:
1085#ifdef CAM_IO_STATS
1086		softc->errors++;
1087#endif
1088		break;
1089	default:
1090		break;
1091	}
1092
1093	return(cam_periph_error(ccb, cam_flags, sense_flags, NULL));
1094}
1095
1096/*
1097 * Step through all NDA peripheral drivers, and if the device is still open,
1098 * sync the disk cache to physical media.
1099 */
1100static void
1101ndaflush(void)
1102{
1103	struct cam_periph *periph;
1104	struct nda_softc *softc;
1105	union ccb *ccb;
1106	int error;
1107
1108	CAM_PERIPH_FOREACH(periph, &ndadriver) {
1109		softc = (struct nda_softc *)periph->softc;
1110		if (SCHEDULER_STOPPED()) {
1111			/* If we paniced with the lock held, do not recurse. */
1112			if (!cam_periph_owned(periph) &&
1113			    (softc->flags & NDA_FLAG_OPEN)) {
1114				ndadump(softc->disk, NULL, 0, 0, 0);
1115			}
1116			continue;
1117		}
1118		cam_periph_lock(periph);
1119		/*
1120		 * We only sync the cache if the drive is still open, and
1121		 * if the drive is capable of it..
1122		 */
1123		if ((softc->flags & NDA_FLAG_OPEN) == 0) {
1124			cam_periph_unlock(periph);
1125			continue;
1126		}
1127
1128		ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL);
1129		nda_nvme_flush(softc, &ccb->nvmeio);
1130		error = cam_periph_runccb(ccb, ndaerror, /*cam_flags*/0,
1131		    /*sense_flags*/ SF_NO_RECOVERY | SF_NO_RETRY,
1132		    softc->disk->d_devstat);
1133		if (error != 0)
1134			xpt_print(periph->path, "Synchronize cache failed\n");
1135		xpt_release_ccb(ccb);
1136		cam_periph_unlock(periph);
1137	}
1138}
1139
1140static void
1141ndashutdown(void *arg, int howto)
1142{
1143
1144	ndaflush();
1145}
1146
1147static void
1148ndasuspend(void *arg)
1149{
1150
1151	ndaflush();
1152}
1153