nvme.c revision 241659
1/*-
2 * Copyright (C) 2012 Intel Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/dev/nvme/nvme.c 241659 2012-10-18 00:38:28Z jimharris $");
29
30#include <sys/param.h>
31#include <sys/bus.h>
32#include <sys/conf.h>
33#include <sys/module.h>
34
35#include <vm/uma.h>
36
37#include <dev/pci/pcireg.h>
38#include <dev/pci/pcivar.h>
39
40#include "nvme_private.h"
41
42struct nvme_consumer {
43	nvme_consumer_cb_fn_t		cb_fn;
44	void				*cb_arg;
45};
46
47struct nvme_consumer nvme_consumer[NVME_MAX_CONSUMERS];
48
49uma_zone_t nvme_request_zone;
50
51MALLOC_DEFINE(M_NVME, "nvme", "nvme(4) memory allocations");
52
53static int    nvme_probe(device_t);
54static int    nvme_attach(device_t);
55static int    nvme_detach(device_t);
56
57static devclass_t nvme_devclass;
58
59static device_method_t nvme_pci_methods[] = {
60	/* Device interface */
61	DEVMETHOD(device_probe,     nvme_probe),
62	DEVMETHOD(device_attach,    nvme_attach),
63	DEVMETHOD(device_detach,    nvme_detach),
64	{ 0, 0 }
65};
66
67static driver_t nvme_pci_driver = {
68	"nvme",
69	nvme_pci_methods,
70	sizeof(struct nvme_controller),
71};
72
73DRIVER_MODULE(nvme, pci, nvme_pci_driver, nvme_devclass, 0, 0);
74MODULE_VERSION(nvme, 1);
75
76static struct _pcsid
77{
78	u_int32_t   type;
79	const char  *desc;
80} pci_ids[] = {
81	{ 0x01118086,		"NVMe Controller"  },
82	{ CHATHAM_PCI_ID,	"Chatham Prototype NVMe Controller"  },
83	{ IDT_PCI_ID,		"IDT NVMe Controller"  },
84	{ 0x00000000,		NULL  }
85};
86
87static int
88nvme_probe (device_t device)
89{
90	struct _pcsid	*ep;
91	u_int32_t	type;
92
93	type = pci_get_devid(device);
94	ep = pci_ids;
95
96	while (ep->type && ep->type != type)
97		++ep;
98
99	if (ep->desc) {
100		device_set_desc(device, ep->desc);
101		return (BUS_PROBE_DEFAULT);
102	}
103
104#if defined(PCIS_STORAGE_NVM)
105	if (pci_get_class(device)    == PCIC_STORAGE &&
106	    pci_get_subclass(device) == PCIS_STORAGE_NVM &&
107	    pci_get_progif(device)   == PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0) {
108		device_set_desc(device, "Generic NVMe Device");
109		return (BUS_PROBE_GENERIC);
110	}
111#endif
112
113	return (ENXIO);
114}
115
116static void
117nvme_init(void)
118{
119	nvme_request_zone = uma_zcreate("nvme_request",
120	    sizeof(struct nvme_request), NULL, NULL, NULL, NULL, 0, 0);
121}
122
123SYSINIT(nvme_register, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_init, NULL);
124
125static void
126nvme_uninit(void)
127{
128	uma_zdestroy(nvme_request_zone);
129}
130
131SYSUNINIT(nvme_unregister, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_uninit, NULL);
132
133static void
134nvme_load(void)
135{
136}
137
138static void
139nvme_unload(void)
140{
141}
142
143static void
144nvme_shutdown(void)
145{
146	device_t		*devlist;
147	struct nvme_controller	*ctrlr;
148	union cc_register	cc;
149	union csts_register	csts;
150	int			dev, devcount;
151
152	if (devclass_get_devices(nvme_devclass, &devlist, &devcount))
153		return;
154
155	for (dev = 0; dev < devcount; dev++) {
156		/*
157		 * Only notify controller of shutdown when a real shutdown is
158		 *  in process, not when a module unload occurs.  It seems at
159		 *  least some controllers (Chatham at least) don't let you
160		 *  re-enable the controller after shutdown notification has
161		 *  been received.
162		 */
163		ctrlr = DEVICE2SOFTC(devlist[dev]);
164		cc.raw = nvme_mmio_read_4(ctrlr, cc);
165		cc.bits.shn = NVME_SHN_NORMAL;
166		nvme_mmio_write_4(ctrlr, cc, cc.raw);
167		csts.raw = nvme_mmio_read_4(ctrlr, csts);
168		while (csts.bits.shst != NVME_SHST_COMPLETE) {
169			DELAY(5);
170			csts.raw = nvme_mmio_read_4(ctrlr, csts);
171		}
172	}
173
174	free(devlist, M_TEMP);
175}
176
177static int
178nvme_modevent(module_t mod, int type, void *arg)
179{
180
181	switch (type) {
182	case MOD_LOAD:
183		nvme_load();
184		break;
185	case MOD_UNLOAD:
186		nvme_unload();
187		break;
188	case MOD_SHUTDOWN:
189		nvme_shutdown();
190		break;
191	default:
192		break;
193	}
194
195	return (0);
196}
197
198moduledata_t nvme_mod = {
199	"nvme",
200	(modeventhand_t)nvme_modevent,
201	0
202};
203
204DECLARE_MODULE(nvme, nvme_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST);
205
206void
207nvme_dump_command(struct nvme_command *cmd)
208{
209	printf("opc:%x f:%x r1:%x cid:%x nsid:%x r2:%x r3:%x "
210	    "mptr:%qx prp1:%qx prp2:%qx cdw:%x %x %x %x %x %x\n",
211	    cmd->opc, cmd->fuse, cmd->rsvd1, cmd->cid, cmd->nsid,
212	    cmd->rsvd2, cmd->rsvd3,
213	    (long long unsigned int)cmd->mptr,
214	    (long long unsigned int)cmd->prp1,
215	    (long long unsigned int)cmd->prp2,
216	    cmd->cdw10, cmd->cdw11, cmd->cdw12, cmd->cdw13, cmd->cdw14,
217	    cmd->cdw15);
218}
219
220void
221nvme_dump_completion(struct nvme_completion *cpl)
222{
223	printf("cdw0:%08x sqhd:%04x sqid:%04x "
224	    "cid:%04x p:%x sc:%02x sct:%x m:%x dnr:%x\n",
225	    cpl->cdw0, cpl->sqhd, cpl->sqid,
226	    cpl->cid, cpl->p, cpl->sf_sc, cpl->sf_sct, cpl->sf_m,
227	    cpl->sf_dnr);
228}
229
230void
231nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
232{
233	struct nvme_tracker 	*tr;
234	struct nvme_qpair 	*qpair;
235	uint32_t		cur_nseg;
236
237	KASSERT(error == 0, ("nvme_payload_map error != 0\n"));
238
239	tr = (struct nvme_tracker *)arg;
240	qpair = tr->qpair;
241
242	/*
243	 * Note that we specified PAGE_SIZE for alignment and max
244	 *  segment size when creating the bus dma tags.  So here
245	 *  we can safely just transfer each segment to its
246	 *  associated PRP entry.
247	 */
248	tr->req->cmd.prp1 = seg[0].ds_addr;
249
250	if (nseg == 2) {
251		tr->req->cmd.prp2 = seg[1].ds_addr;
252	} else if (nseg > 2) {
253		cur_nseg = 1;
254		tr->req->cmd.prp2 = (uint64_t)tr->prp_bus_addr;
255		while (cur_nseg < nseg) {
256			tr->prp[cur_nseg-1] =
257			    (uint64_t)seg[cur_nseg].ds_addr;
258			cur_nseg++;
259		}
260	}
261
262	nvme_qpair_submit_cmd(qpair, tr);
263}
264
265struct nvme_tracker *
266nvme_allocate_tracker(struct nvme_controller *ctrlr, boolean_t is_admin,
267    struct nvme_request *req)
268{
269	struct nvme_tracker 	*tr;
270	struct nvme_qpair	*qpair;
271
272	if (is_admin) {
273		qpair = &ctrlr->adminq;
274	} else {
275		if (ctrlr->per_cpu_io_queues)
276			qpair = &ctrlr->ioq[curcpu];
277		else
278			qpair = &ctrlr->ioq[0];
279	}
280
281	tr = nvme_qpair_allocate_tracker(qpair);
282
283	if (tr == NULL)
284		return (NULL);
285
286	tr->qpair = qpair;
287	tr->req = req;
288
289	return (tr);
290}
291
292static int
293nvme_attach(device_t dev)
294{
295	struct nvme_controller	*ctrlr = DEVICE2SOFTC(dev);
296	int			status;
297
298	status = nvme_ctrlr_construct(ctrlr, dev);
299
300	if (status != 0)
301		return (status);
302
303	/*
304	 * Reset controller twice to ensure we do a transition from cc.en==1
305	 *  to cc.en==0.  This is because we don't really know what status
306	 *  the controller was left in when boot handed off to OS.
307	 */
308	status = nvme_ctrlr_reset(ctrlr);
309	if (status != 0)
310		return (status);
311
312	status = nvme_ctrlr_reset(ctrlr);
313	if (status != 0)
314		return (status);
315
316	ctrlr->config_hook.ich_func = nvme_ctrlr_start;
317	ctrlr->config_hook.ich_arg = ctrlr;
318
319	config_intrhook_establish(&ctrlr->config_hook);
320
321	return (0);
322}
323
324static int
325nvme_detach (device_t dev)
326{
327	struct nvme_controller	*ctrlr = DEVICE2SOFTC(dev);
328	struct nvme_namespace	*ns;
329	int			i;
330
331	if (ctrlr->taskqueue) {
332		taskqueue_drain(ctrlr->taskqueue, &ctrlr->task);
333		taskqueue_free(ctrlr->taskqueue);
334	}
335
336	for (i = 0; i < NVME_MAX_NAMESPACES; i++) {
337		ns = &ctrlr->ns[i];
338		if (ns->cdev)
339			destroy_dev(ns->cdev);
340	}
341
342	if (ctrlr->cdev)
343		destroy_dev(ctrlr->cdev);
344
345	for (i = 0; i < ctrlr->num_io_queues; i++) {
346		nvme_io_qpair_destroy(&ctrlr->ioq[i]);
347	}
348
349	free(ctrlr->ioq, M_NVME);
350
351	nvme_admin_qpair_destroy(&ctrlr->adminq);
352
353	if (ctrlr->resource != NULL) {
354		bus_release_resource(dev, SYS_RES_MEMORY,
355		    ctrlr->resource_id, ctrlr->resource);
356	}
357
358#ifdef CHATHAM2
359	if (ctrlr->chatham_resource != NULL) {
360		bus_release_resource(dev, SYS_RES_MEMORY,
361		    ctrlr->chatham_resource_id, ctrlr->chatham_resource);
362	}
363#endif
364
365	if (ctrlr->tag)
366		bus_teardown_intr(ctrlr->dev, ctrlr->res, ctrlr->tag);
367
368	if (ctrlr->res)
369		bus_release_resource(ctrlr->dev, SYS_RES_IRQ,
370		    rman_get_rid(ctrlr->res), ctrlr->res);
371
372	if (ctrlr->msix_enabled)
373		pci_release_msi(dev);
374
375	return (0);
376}
377
378static void
379nvme_notify_consumer(struct nvme_consumer *consumer)
380{
381	device_t		*devlist;
382	struct nvme_controller	*ctrlr;
383	int			dev, ns, devcount;
384
385	if (devclass_get_devices(nvme_devclass, &devlist, &devcount))
386		return;
387
388	for (dev = 0; dev < devcount; dev++) {
389		ctrlr = DEVICE2SOFTC(devlist[dev]);
390		for (ns = 0; ns < ctrlr->cdata.nn; ns++)
391			(*consumer->cb_fn)(consumer->cb_arg, &ctrlr->ns[ns]);
392	}
393
394	free(devlist, M_TEMP);
395}
396
397struct nvme_consumer *
398nvme_register_consumer(nvme_consumer_cb_fn_t cb_fn, void *cb_arg)
399{
400	int i;
401
402	/*
403	 * TODO: add locking around consumer registration.  Not an issue
404	 *  right now since we only have one nvme consumer - nvd(4).
405	 */
406	for (i = 0; i < NVME_MAX_CONSUMERS; i++)
407		if (nvme_consumer[i].cb_fn == NULL) {
408			nvme_consumer[i].cb_fn = cb_fn;
409			nvme_consumer[i].cb_arg = cb_arg;
410
411			nvme_notify_consumer(&nvme_consumer[i]);
412			return (&nvme_consumer[i]);
413		}
414
415	printf("nvme(4): consumer not registered - no slots available\n");
416	return (NULL);
417}
418
419void
420nvme_unregister_consumer(struct nvme_consumer *consumer)
421{
422
423	consumer->cb_fn = NULL;
424	consumer->cb_arg = NULL;
425}
426
427