nvme.c revision 241657
1/*-
2 * Copyright (C) 2012 Intel Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/dev/nvme/nvme.c 241657 2012-10-18 00:32:07Z jimharris $");
29
30#include <sys/param.h>
31#include <sys/bus.h>
32#include <sys/conf.h>
33#include <sys/module.h>
34
35#include <dev/pci/pcireg.h>
36#include <dev/pci/pcivar.h>
37
38#include "nvme_private.h"
39
40struct nvme_consumer {
41	nvme_consumer_cb_fn_t		cb_fn;
42	void				*cb_arg;
43};
44
45struct nvme_consumer nvme_consumer[NVME_MAX_CONSUMERS];
46
47MALLOC_DEFINE(M_NVME, "nvme", "nvme(4) memory allocations");
48
49static int    nvme_probe(device_t);
50static int    nvme_attach(device_t);
51static int    nvme_detach(device_t);
52
53static devclass_t nvme_devclass;
54
55static device_method_t nvme_pci_methods[] = {
56	/* Device interface */
57	DEVMETHOD(device_probe,     nvme_probe),
58	DEVMETHOD(device_attach,    nvme_attach),
59	DEVMETHOD(device_detach,    nvme_detach),
60	{ 0, 0 }
61};
62
63static driver_t nvme_pci_driver = {
64	"nvme",
65	nvme_pci_methods,
66	sizeof(struct nvme_controller),
67};
68
69DRIVER_MODULE(nvme, pci, nvme_pci_driver, nvme_devclass, 0, 0);
70MODULE_VERSION(nvme, 1);
71
72static struct _pcsid
73{
74	u_int32_t   type;
75	const char  *desc;
76} pci_ids[] = {
77	{ 0x01118086,		"NVMe Controller"  },
78	{ CHATHAM_PCI_ID,	"Chatham Prototype NVMe Controller"  },
79	{ IDT_PCI_ID,		"IDT NVMe Controller"  },
80	{ 0x00000000,		NULL  }
81};
82
83static int
84nvme_probe (device_t device)
85{
86	struct _pcsid	*ep;
87	u_int32_t	type;
88
89	type = pci_get_devid(device);
90	ep = pci_ids;
91
92	while (ep->type && ep->type != type)
93		++ep;
94
95	if (ep->desc) {
96		device_set_desc(device, ep->desc);
97		return (BUS_PROBE_DEFAULT);
98	}
99
100#if defined(PCIS_STORAGE_NVM)
101	if (pci_get_class(device)    == PCIC_STORAGE &&
102	    pci_get_subclass(device) == PCIS_STORAGE_NVM &&
103	    pci_get_progif(device)   == PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0) {
104		device_set_desc(device, "Generic NVMe Device");
105		return (BUS_PROBE_GENERIC);
106	}
107#endif
108
109	return (ENXIO);
110}
111
112static void
113nvme_load(void)
114{
115}
116
117static void
118nvme_unload(void)
119{
120}
121
122static void
123nvme_shutdown(void)
124{
125	device_t		*devlist;
126	struct nvme_controller	*ctrlr;
127	union cc_register	cc;
128	union csts_register	csts;
129	int			dev, devcount;
130
131	if (devclass_get_devices(nvme_devclass, &devlist, &devcount))
132		return;
133
134	for (dev = 0; dev < devcount; dev++) {
135		/*
136		 * Only notify controller of shutdown when a real shutdown is
137		 *  in process, not when a module unload occurs.  It seems at
138		 *  least some controllers (Chatham at least) don't let you
139		 *  re-enable the controller after shutdown notification has
140		 *  been received.
141		 */
142		ctrlr = DEVICE2SOFTC(devlist[dev]);
143		cc.raw = nvme_mmio_read_4(ctrlr, cc);
144		cc.bits.shn = NVME_SHN_NORMAL;
145		nvme_mmio_write_4(ctrlr, cc, cc.raw);
146		csts.raw = nvme_mmio_read_4(ctrlr, csts);
147		while (csts.bits.shst != NVME_SHST_COMPLETE) {
148			DELAY(5);
149			csts.raw = nvme_mmio_read_4(ctrlr, csts);
150		}
151	}
152
153	free(devlist, M_TEMP);
154}
155
156static int
157nvme_modevent(module_t mod, int type, void *arg)
158{
159
160	switch (type) {
161	case MOD_LOAD:
162		nvme_load();
163		break;
164	case MOD_UNLOAD:
165		nvme_unload();
166		break;
167	case MOD_SHUTDOWN:
168		nvme_shutdown();
169		break;
170	default:
171		break;
172	}
173
174	return (0);
175}
176
177moduledata_t nvme_mod = {
178	"nvme",
179	(modeventhand_t)nvme_modevent,
180	0
181};
182
183DECLARE_MODULE(nvme, nvme_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST);
184
185void
186nvme_dump_command(struct nvme_command *cmd)
187{
188	printf("opc:%x f:%x r1:%x cid:%x nsid:%x r2:%x r3:%x "
189	    "mptr:%qx prp1:%qx prp2:%qx cdw:%x %x %x %x %x %x\n",
190	    cmd->opc, cmd->fuse, cmd->rsvd1, cmd->cid, cmd->nsid,
191	    cmd->rsvd2, cmd->rsvd3,
192	    (long long unsigned int)cmd->mptr,
193	    (long long unsigned int)cmd->prp1,
194	    (long long unsigned int)cmd->prp2,
195	    cmd->cdw10, cmd->cdw11, cmd->cdw12, cmd->cdw13, cmd->cdw14,
196	    cmd->cdw15);
197}
198
199void
200nvme_dump_completion(struct nvme_completion *cpl)
201{
202	printf("cdw0:%08x sqhd:%04x sqid:%04x "
203	    "cid:%04x p:%x sc:%02x sct:%x m:%x dnr:%x\n",
204	    cpl->cdw0, cpl->sqhd, cpl->sqid,
205	    cpl->cid, cpl->p, cpl->sf_sc, cpl->sf_sct, cpl->sf_m,
206	    cpl->sf_dnr);
207}
208
209void
210nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
211{
212	struct nvme_tracker 	*tr;
213	struct nvme_qpair 	*qpair;
214	struct nvme_prp_list	*prp_list;
215	uint32_t		cur_nseg;
216
217	KASSERT(error == 0, ("nvme_payload_map error != 0\n"));
218
219	tr = (struct nvme_tracker *)arg;
220	qpair = tr->qpair;
221
222	/*
223	 * Note that we specified PAGE_SIZE for alignment and max
224	 *  segment size when creating the bus dma tags.  So here
225	 *  we can safely just transfer each segment to its
226	 *  associated PRP entry.
227	 */
228	tr->cmd.prp1 = seg[0].ds_addr;
229
230	if (nseg == 2) {
231		tr->cmd.prp2 = seg[1].ds_addr;
232	} else if (nseg > 2) {
233		KASSERT(tr->prp_list,
234		    ("prp_list needed but not attached to tracker\n"));
235		cur_nseg = 1;
236		prp_list = tr->prp_list;
237		tr->cmd.prp2 = (uint64_t)prp_list->bus_addr;
238		while (cur_nseg < nseg) {
239			prp_list->prp[cur_nseg-1] =
240			    (uint64_t)seg[cur_nseg].ds_addr;
241			cur_nseg++;
242		}
243	}
244
245	nvme_qpair_submit_cmd(qpair, tr);
246}
247
248struct nvme_tracker *
249nvme_allocate_tracker(struct nvme_controller *ctrlr, boolean_t is_admin,
250    nvme_cb_fn_t cb_fn, void *cb_arg, uint32_t payload_size, void *payload)
251{
252	struct nvme_tracker 	*tr;
253	struct nvme_qpair	*qpair;
254	uint32_t 		modulo, offset, num_prps;
255	boolean_t		alloc_prp_list = FALSE;
256
257	if (is_admin) {
258		qpair = &ctrlr->adminq;
259	} else {
260		if (ctrlr->per_cpu_io_queues)
261			qpair = &ctrlr->ioq[curcpu];
262		else
263			qpair = &ctrlr->ioq[0];
264	}
265
266	num_prps = payload_size / PAGE_SIZE;
267	modulo = payload_size % PAGE_SIZE;
268	offset = (uint32_t)((uintptr_t)payload % PAGE_SIZE);
269
270	if (modulo || offset)
271		num_prps += 1 + (modulo + offset - 1) / PAGE_SIZE;
272
273	if (num_prps > 2)
274		alloc_prp_list = TRUE;
275
276	tr = nvme_qpair_allocate_tracker(qpair, alloc_prp_list);
277
278	if (tr == NULL)
279		return (NULL);
280
281	memset(&tr->cmd, 0, sizeof(tr->cmd));
282
283	tr->qpair = qpair;
284	tr->cb_fn = cb_fn;
285	tr->cb_arg = cb_arg;
286	tr->payload_size = payload_size;
287
288	return (tr);
289}
290
291static int
292nvme_attach(device_t dev)
293{
294	struct nvme_controller	*ctrlr = DEVICE2SOFTC(dev);
295	int			status;
296
297	status = nvme_ctrlr_construct(ctrlr, dev);
298
299	if (status != 0)
300		return (status);
301
302	/*
303	 * Reset controller twice to ensure we do a transition from cc.en==1
304	 *  to cc.en==0.  This is because we don't really know what status
305	 *  the controller was left in when boot handed off to OS.
306	 */
307	status = nvme_ctrlr_reset(ctrlr);
308	if (status != 0)
309		return (status);
310
311	status = nvme_ctrlr_reset(ctrlr);
312	if (status != 0)
313		return (status);
314
315	ctrlr->config_hook.ich_func = nvme_ctrlr_start;
316	ctrlr->config_hook.ich_arg = ctrlr;
317
318	config_intrhook_establish(&ctrlr->config_hook);
319
320	return (0);
321}
322
323static int
324nvme_detach (device_t dev)
325{
326	struct nvme_controller	*ctrlr = DEVICE2SOFTC(dev);
327	struct nvme_namespace	*ns;
328	int			i;
329
330	if (ctrlr->taskqueue) {
331		taskqueue_drain(ctrlr->taskqueue, &ctrlr->task);
332		taskqueue_free(ctrlr->taskqueue);
333	}
334
335	for (i = 0; i < NVME_MAX_NAMESPACES; i++) {
336		ns = &ctrlr->ns[i];
337		if (ns->cdev)
338			destroy_dev(ns->cdev);
339	}
340
341	if (ctrlr->cdev)
342		destroy_dev(ctrlr->cdev);
343
344	for (i = 0; i < ctrlr->num_io_queues; i++) {
345		nvme_io_qpair_destroy(&ctrlr->ioq[i]);
346	}
347
348	free(ctrlr->ioq, M_NVME);
349
350	nvme_admin_qpair_destroy(&ctrlr->adminq);
351
352	if (ctrlr->resource != NULL) {
353		bus_release_resource(dev, SYS_RES_MEMORY,
354		    ctrlr->resource_id, ctrlr->resource);
355	}
356
357#ifdef CHATHAM2
358	if (ctrlr->chatham_resource != NULL) {
359		bus_release_resource(dev, SYS_RES_MEMORY,
360		    ctrlr->chatham_resource_id, ctrlr->chatham_resource);
361	}
362#endif
363
364	if (ctrlr->tag)
365		bus_teardown_intr(ctrlr->dev, ctrlr->res, ctrlr->tag);
366
367	if (ctrlr->res)
368		bus_release_resource(ctrlr->dev, SYS_RES_IRQ,
369		    rman_get_rid(ctrlr->res), ctrlr->res);
370
371	if (ctrlr->msix_enabled)
372		pci_release_msi(dev);
373
374	return (0);
375}
376
377static void
378nvme_notify_consumer(struct nvme_consumer *consumer)
379{
380	device_t		*devlist;
381	struct nvme_controller	*ctrlr;
382	int			dev, ns, devcount;
383
384	if (devclass_get_devices(nvme_devclass, &devlist, &devcount))
385		return;
386
387	for (dev = 0; dev < devcount; dev++) {
388		ctrlr = DEVICE2SOFTC(devlist[dev]);
389		for (ns = 0; ns < ctrlr->cdata.nn; ns++)
390			(*consumer->cb_fn)(consumer->cb_arg, &ctrlr->ns[ns]);
391	}
392
393	free(devlist, M_TEMP);
394}
395
396struct nvme_consumer *
397nvme_register_consumer(nvme_consumer_cb_fn_t cb_fn, void *cb_arg)
398{
399	int i;
400
401	/*
402	 * TODO: add locking around consumer registration.  Not an issue
403	 *  right now since we only have one nvme consumer - nvd(4).
404	 */
405	for (i = 0; i < NVME_MAX_CONSUMERS; i++)
406		if (nvme_consumer[i].cb_fn == NULL) {
407			nvme_consumer[i].cb_fn = cb_fn;
408			nvme_consumer[i].cb_arg = cb_arg;
409
410			nvme_notify_consumer(&nvme_consumer[i]);
411			return (&nvme_consumer[i]);
412		}
413
414	printf("nvme(4): consumer not registered - no slots available\n");
415	return (NULL);
416}
417
418void
419nvme_unregister_consumer(struct nvme_consumer *consumer)
420{
421
422	consumer->cb_fn = NULL;
423	consumer->cb_arg = NULL;
424}
425
426